diff --git common/src/java/org/apache/hive/common/util/DateUtils.java common/src/java/org/apache/hive/common/util/DateUtils.java index 959a542..a1068ec 100644 --- common/src/java/org/apache/hive/common/util/DateUtils.java +++ common/src/java/org/apache/hive/common/util/DateUtils.java @@ -54,4 +54,24 @@ public static int parseNumericValueWithRange(String fieldName, } return result; } + + // From java.util.Calendar + private static final String[] FIELD_NAME = { + "ERA", "YEAR", "MONTH", "WEEK_OF_YEAR", "WEEK_OF_MONTH", "DAY_OF_MONTH", + "DAY_OF_YEAR", "DAY_OF_WEEK", "DAY_OF_WEEK_IN_MONTH", "AM_PM", "HOUR", + "HOUR_OF_DAY", "MINUTE", "SECOND", "MILLISECOND", "ZONE_OFFSET", + "DST_OFFSET" + }; + + /** + * Returns the name of the specified calendar field. + * + * @param field the calendar field + * @return the calendar field name + * @exception IndexOutOfBoundsException if field is negative, + * equal to or greater then FIELD_COUNT. + */ + public static String getFieldName(int field) { + return FIELD_NAME[field]; + } } diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 15e0db4..3b4bb6d 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -517,7 +517,6 @@ minillaplocal.query.files=acid_globallimit.q,\ multiMapJoin2.q,\ non_native_window_udf.q,\ orc_analyze.q,\ - orc_llap.q,\ orc_llap_nonvector.q,\ orc_ppd_date.q,\ orc_ppd_decimal.q,\ diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt index e52fcc0..d8164a4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt @@ -157,7 +157,12 @@ public class extends VectorExpression { public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } - + + @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt index e1df589..31a015f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt @@ -167,6 +167,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnWithConvert.txt deleted file mode 100644 index bcd10a2..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnWithConvert.txt +++ /dev/null @@ -1,173 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Generated from template ColumnArithmeticColumnWithConvert.txt, which covers binary arithmetic - * expressions between columns. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - private int outputColumn; - - public (int colNum1, int colNum2, int outputColumn) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector1 = () batch.cols[colNum1]; - inputColVector2 = () batch.cols[colNum2]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - [] vector1 = inputColVector1.vector; - [] vector2 = inputColVector2.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first - NullUtil.propagateNullsColCol( - inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - - /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or - * more inputs are null. This is to improve speed by avoiding - * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputVector[0] = (vector1[0]) (vector2[0]); - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[0]) (vector2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector1[0]) (vector2[i]); - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[i]) (vector2[0]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector1[i]) (vector2[0]); - } - } - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[i]) (vector2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector1[i]) (vector2[i]); - } - } - } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and - * NaN for double. This is to prevent possible later zero-divide errors - * in complex arithmetic expressions like col2 / (col1 - 1) - * in the case when some col1 entries are null. - */ - NullUtil.setNullDataEntries(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} - diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt index 87335f1..2cc1aa2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt @@ -134,6 +134,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt index 0bb1532..294bb4f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt @@ -132,6 +132,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt deleted file mode 100644 index 105eb92..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt +++ /dev/null @@ -1,150 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; - -/** - * Generated from template ColumnArithmeticScalarWithConvert.txt, which covers binary arithmetic - * expressions between a column and a scalar. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - - public (int colNum, value, int outputColumn) { - this.colNum = colNum; - this.value = (value); - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.isRepeating) { - outputVector[0] = (vector[0]) value; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector[i]) value; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector[i]) value; - } - } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector[i]) value; - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector[i]) value; - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt index f2b4c81..cbc97da 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt @@ -157,6 +157,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt index 2438ee4..6568d1c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt @@ -149,6 +149,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index b0f6eb1..04b533a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -184,6 +184,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt index 623bcfb..68c4f58 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt index c6614ab..25e0d85 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt index 841ef93..0728f6c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt @@ -138,6 +138,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt index cf690db..efbf1ba 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt @@ -122,6 +122,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index b52b7c7..6574267 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -124,6 +124,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt index c3d8d7e..2a9f947 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt @@ -181,6 +181,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt index d1474fb..4bbc358 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt @@ -141,6 +141,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt index 63cebaf..2e66b3a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt @@ -170,6 +170,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt index 7aee529..e679449 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt index c68ac34..e23dc27 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt @@ -155,6 +155,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt index cb6b750..85d88fd 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt @@ -146,6 +146,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt index 619015e..0b7fefc 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt @@ -119,6 +119,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt index 7c5615d..aabd20f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -173,7 +173,12 @@ public class extends VectorExpression { public void setRightValue( value) { this.rightValue = value; } - + + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + leftValue + ", right " + rightValue; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt index e25b9c2..ee80606 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt @@ -33,8 +33,8 @@ public class extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum1; - private int colNum2; + protected int colNum1; + protected int colNum2; public (int colNum1, int colNum2) { this.colNum1 = colNum1; @@ -182,6 +182,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt index b0f6e5c..248a66a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt @@ -32,8 +32,8 @@ public class extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum; - private value; + protected int colNum; + protected value; public (int colNum, value) { this.colNum = colNum; @@ -158,6 +158,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt index 62d2254..312be49 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt @@ -171,6 +171,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt index a2352c6..ee450d3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt @@ -430,6 +430,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt index bdd39b9..9943f45 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt @@ -145,6 +145,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt index 0608016..4477aff 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt @@ -145,6 +145,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt index 57caf7e..610c062 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt @@ -170,6 +170,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt index f5f59c2..73c46a1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt @@ -154,6 +154,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt index b7544c7..037382c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt @@ -32,8 +32,8 @@ public class extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum; - private value; + protected int colNum; + protected value; public ( value, int colNum) { this.colNum = colNum; @@ -158,6 +158,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt index 16d4aaf..0cfc201 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -176,7 +178,12 @@ public class extends VectorExpression { public void setRightValue(byte[] value) { this.right = value; } - + + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + new String(left, StandardCharsets.UTF_8) + ", right " + new String(right, StandardCharsets.UTF_8); + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt index a72b882..9114932 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt @@ -477,6 +477,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt index 8b1c366..b56d451 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -152,4 +154,9 @@ public abstract class extends VectorExpression { this.value = value; } + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(value, StandardCharsets.UTF_8); + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt index 930069c..4fb5035 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -155,4 +157,10 @@ public abstract class extends VectorExpression { public void setValue(byte[] value) { this.value = value; } + + @Override + public String vectorExpressionParameters() { + return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + + colNum; + } + } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt index 806148f..9c268e2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt @@ -170,6 +170,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt index d10be96..8873826 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt @@ -167,6 +167,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt index 31c3f6b..8583eee 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt @@ -150,6 +150,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt index 31dce1c..eeb73c9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt @@ -436,6 +436,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt index bab8508..23790a5 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt @@ -147,6 +147,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt index 5e418de..0e10779 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt @@ -43,6 +43,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt index ff5d11e..5a6def3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt @@ -149,6 +149,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt index d350dcb..f9bdbe0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.common.type.; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -178,7 +180,13 @@ public class extends VectorExpression { public void setRightValue(byte[] value) { this.right = value; } - + + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + new String(left, StandardCharsets.UTF_8) + + ", right " + new String(right, StandardCharsets.UTF_8); + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt index 3a75a26..9f4bb75 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -160,6 +160,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col " + arg2Column + ", val "+ arg3Scalar; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt index 648b776..487d894 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt @@ -162,6 +162,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt index def9863..5651d15 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt @@ -147,6 +147,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt index 8e3a419..49a1950 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt @@ -180,6 +180,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt index ad65d52..283352d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt @@ -141,6 +141,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt index 858c3d7..9eba829 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt @@ -170,6 +170,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt index 66fffd2..9a06822 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt index ddde913..a5d9877 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt @@ -155,6 +155,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt index cbb7021..9a0d397 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt @@ -143,6 +143,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt index 9ccfaac..cff2deb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt index c7d8c65..8308a30 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt @@ -129,6 +129,10 @@ public class extends VectorExpression { return "long"; } + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt index d47bc10..6aa30e4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt @@ -129,6 +129,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt index 4fcbdc0..8473599 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt @@ -146,6 +146,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt index ea55bec..d3fd9bd 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt @@ -129,6 +129,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt deleted file mode 100644 index 91887c8..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt +++ /dev/null @@ -1,163 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; - - -/* - * Because of the templatized nature of the code, either or both - * of these ColumnVector imports may be needed. Listing both of them - * rather than using ....vectorization.*; - */ -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; - -/** - * Generated from template ScalarArithmeticColumnWithConvert.txt. - * Implements a vectorized arithmetic operator with a scalar on the left and a - * column vector on the right. The result is output to an output column vector. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - - public ( value, int colNum, int outputColumn) { - this.colNum = colNum; - this.value = (value); - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - /** - * Method to evaluate scalar-column operation in vectorized fashion. - * - * @batch a package of rows with each column stored in a vector - */ - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.isRepeating) { - outputVector[0] = value (vector[0]); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value (vector[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value (vector[i]); - } - } - } else { /* there are nulls */ - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value (vector[i]); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value (vector[i]); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt index e6e59f5..6f9e2e2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt @@ -149,6 +149,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt index f8a8457..8e6e8a9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt @@ -162,6 +162,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt index c8a5d17..1014978 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt @@ -131,6 +131,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt index e881037..747f707 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt @@ -493,6 +493,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt index 92bf27a..d9530d6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -155,4 +157,8 @@ public abstract class extends VectorExpression { this.outputColumn = outputColumn; } + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(value, StandardCharsets.UTF_8); + } } \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt index 8a92f54..8e36fc0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt @@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.common.type.; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -44,6 +46,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(value, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt index 238dc93..5eed703 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -154,4 +156,9 @@ public abstract class extends VectorExpression { public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + + @Override + public String vectorExpressionParameters() { + return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + + colNum; + } } \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt index 27e083d..7aeff81 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt @@ -171,6 +171,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt index 8b91a4a..f8cb880 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt @@ -132,6 +132,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt index 4ac2174..989e2f5 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt @@ -170,6 +170,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt index 9382aca..a90b1b2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt @@ -128,6 +128,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt index 5eaa450..ad43cac 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt @@ -161,6 +161,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt index c6c872f..32b49a3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt @@ -130,6 +130,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt index 0fc402d..7267148 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt @@ -138,6 +138,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt index e0ae206..2be05f3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt @@ -129,6 +129,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt index f9fc425..2710fa4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt @@ -143,6 +143,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt index 90701ec..32647f2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt @@ -133,6 +133,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt index f958be8..dea4db2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt @@ -153,6 +153,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt index 585027a..e82b9e2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt @@ -152,6 +152,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt index 996c86a..0d8a26b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt @@ -143,6 +143,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt index 6506c93..ec0a395 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt @@ -135,6 +135,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt index a9a3b6d..26da73a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt @@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.common.type.; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt index d153fd6..4393c3b 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt @@ -85,6 +85,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Object[] partialResult; transient private LongWritable resultCount; transient private DoubleWritable resultSum; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt index 46d66bd..7468c2f 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt @@ -77,6 +77,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt index b532e2f..57b7ea5 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt @@ -82,6 +82,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt index 3cdf7e2..749e97e 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt @@ -81,6 +81,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt index cdce457..9dfc147 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt @@ -93,6 +93,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Text result; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt index 7e34965..32ecb34 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt @@ -83,6 +83,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt index cc7e54d..bd0f14d 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt @@ -78,6 +78,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final result; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt index c6c9c52..dc9d4b1 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt @@ -84,6 +84,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt index 8fc94ba..01062a9 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt @@ -111,6 +111,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index b8a4693..74cec3e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -20,6 +20,8 @@ import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME; +import org.apache.commons.lang3.tuple.ImmutablePair; + import java.io.OutputStream; import java.io.PrintStream; import java.io.Serializable; @@ -35,30 +37,68 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Stack; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.jsonexplain.JsonParser; import org.apache.hadoop.hive.common.jsonexplain.JsonParserFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.Validator.StringSet; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.exec.spark.SparkTask; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; +import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExplainWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SparkWork; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationFactory; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.AnnotationUtils; @@ -157,6 +197,54 @@ public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws E return outJSONObject; } + private static String trueCondNameVectorizationEnabled = + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname + " IS true"; + private static String falseCondNameVectorizationEnabled = + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname + " IS false"; + + private ImmutablePair outputPlanVectorization(PrintStream out, boolean jsonOutput) + throws Exception { + + if (out != null) { + out.println("PLAN VECTORIZATION:"); + } + + JSONObject json = jsonOutput ? new JSONObject(new LinkedHashMap<>()) : null; + + HiveConf hiveConf = queryState.getConf(); + + boolean isVectorizationEnabled = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); + String isVectorizationEnabledCondName = + (isVectorizationEnabled ? + trueCondNameVectorizationEnabled : + falseCondNameVectorizationEnabled); + List isVectorizationEnabledCondList = Arrays.asList(isVectorizationEnabledCondName); + + if (out != null) { + out.print(indentString(2)); + out.print("enabled: "); + out.println(isVectorizationEnabled); + out.print(indentString(2)); + if (!isVectorizationEnabled) { + out.print("enabledConditionsNotMet: "); + } else { + out.print("enabledConditionsMet: "); + } + out.println(isVectorizationEnabledCondList); + } + if (jsonOutput) { + json.put("enabled", isVectorizationEnabled); + if (!isVectorizationEnabled) { + json.put("enabledConditionsNotMet", isVectorizationEnabledCondList); + } else { + json.put("enabledConditionsMet", isVectorizationEnabledCondList); + } + } + + return new ImmutablePair(isVectorizationEnabled, jsonOutput ? json : null); + } + public JSONObject getJSONPlan(PrintStream out, ExplainWork work) throws Exception { return getJSONPlan(out, work.getRootTasks(), work.getFetchTask(), @@ -184,26 +272,46 @@ public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetc ordered.add(fetchTask); } - JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered); + boolean suppressOthersForVectorization = false; + if (this.work != null && this.work.isVectorization()) { + ImmutablePair planVecPair = outputPlanVectorization(out, jsonOutput); + + if (this.work.isVectorizationOnly()) { + // Suppress the STAGES if vectorization is off. + suppressOthersForVectorization = !planVecPair.left; + } - if (out != null) { - out.println(); + if (out != null) { + out.println(); + } + + if (jsonOutput) { + outJSONObject.put("PLAN VECTORIZATION", planVecPair.right); + } } - if (jsonOutput) { - outJSONObject.put("STAGE DEPENDENCIES", jsonDependencies); - } + if (!suppressOthersForVectorization) { + JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered); - // Go over all the tasks and dump out the plans - JSONObject jsonPlan = outputStagePlans(out, ordered, - jsonOutput, isExtended); + if (out != null) { + out.println(); + } - if (jsonOutput) { - outJSONObject.put("STAGE PLANS", jsonPlan); - } + if (jsonOutput) { + outJSONObject.put("STAGE DEPENDENCIES", jsonDependencies); + } - if (fetchTask != null) { - fetchTask.setParentTasks(null); + // Go over all the tasks and dump out the plans + JSONObject jsonPlan = outputStagePlans(out, ordered, + jsonOutput, isExtended); + + if (jsonOutput) { + outJSONObject.put("STAGE PLANS", jsonPlan); + } + + if (fetchTask != null) { + fetchTask.setParentTasks(null); + } } return jsonOutput ? outJSONObject : null; @@ -602,6 +710,64 @@ private JSONObject outputPlan(Object work, PrintStream out, } } if (invokeFlag) { + Vectorization vectorization = xpl_note.vectorization(); + if (this.work != null && this.work.isVectorization()) { + + // The EXPLAIN VECTORIZATION option was specified. + final boolean desireOnly = this.work.isVectorizationOnly(); + final VectorizationDetailLevel desiredVecDetailLevel = + this.work.isVectorizationDetailLevel(); + + switch (vectorization) { + case NON_VECTORIZED: + // Display all non-vectorized leaf objects unless ONLY. + if (desireOnly) { + invokeFlag = false; + } + break; + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + if (vectorization.rank < desiredVecDetailLevel.rank) { + // This detail not desired. + invokeFlag = false; + } + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + if (desireOnly) { + if (vectorization.rank < desiredVecDetailLevel.rank) { + // Suppress headers and all objects below. + invokeFlag = false; + } + } + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + } else { + // Do not display vectorization objects. + switch (vectorization) { + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + invokeFlag = false; + break; + case NON_VECTORIZED: + // No action. + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + // Always include headers since they contain non-vectorized objects, too. + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + } + } + if (invokeFlag) { keyJSONObject = xpl_note.displayName(); if (out != null) { out.print(indentString(indent)); @@ -675,6 +841,64 @@ private JSONObject outputPlan(Object work, PrintStream out, } } if (invokeFlag) { + Vectorization vectorization = xpl_note.vectorization(); + if (this.work != null && this.work.isVectorization()) { + + // The EXPLAIN VECTORIZATION option was specified. + final boolean desireOnly = this.work.isVectorizationOnly(); + final VectorizationDetailLevel desiredVecDetailLevel = + this.work.isVectorizationDetailLevel(); + + switch (vectorization) { + case NON_VECTORIZED: + // Display all non-vectorized leaf objects unless ONLY. + if (desireOnly) { + invokeFlag = false; + } + break; + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + if (vectorization.rank < desiredVecDetailLevel.rank) { + // This detail not desired. + invokeFlag = false; + } + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + if (desireOnly) { + if (vectorization.rank < desiredVecDetailLevel.rank) { + // Suppress headers and all objects below. + invokeFlag = false; + } + } + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + } else { + // Do not display vectorization objects. + switch (vectorization) { + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + invokeFlag = false; + break; + case NON_VECTORIZED: + // No action. + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + // Always include headers since they contain non-vectorized objects, too. + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + } + } + if (invokeFlag) { Object val = null; try { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index 038b96c..af1fa66 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -41,6 +41,8 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc; import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator; +import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; +import org.apache.hadoop.hive.ql.plan.AbstractVectorDesc; import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc; @@ -73,6 +75,7 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; import com.google.common.base.Preconditions; @@ -142,6 +145,8 @@ Class> opClass, CompilationOpContext cContext, T conf, VectorizationContext vContext) throws HiveException { try { + VectorDesc vectorDesc = ((AbstractOperatorDesc) conf).getVectorDesc(); + vectorDesc.setVectorOp(opClass); Operator op = (Operator) opClass.getDeclaredConstructor( CompilationOpContext.class, VectorizationContext.class, OperatorDesc.class) .newInstance(cContext, vContext, conf); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java index a30c771..94af097 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java @@ -201,5 +201,4 @@ public boolean isIdentitySelect() { return true; } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index 1cd9021..63e4802 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -93,9 +93,7 @@ public static boolean isSupportedField(ObjectInspector foi) { return true; } - public static boolean isSupportedField(String typeName) { - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - + public static boolean isSupportedField(TypeInfo typeInfo) { if (typeInfo.getCategory() != Category.PRIMITIVE) return false; // not supported PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; PrimitiveCategory pc = primitiveTypeInfo.getPrimitiveCategory(); @@ -103,6 +101,11 @@ public static boolean isSupportedField(String typeName) { return true; } + public static boolean isSupportedField(String typeName) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + return isSupportedField(typeInfo); + } + public static MapJoinKey readFromVector(Output output, MapJoinKey key, Object[] keyObject, List keyOIs, boolean mayReuseKey) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java index 1634f42..3cf6561 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java @@ -75,7 +75,7 @@ public void init(ExecMapperContext context, MapredContext mrContext, Configurati this.desc = joinOp.getConf(); if (desc.getVectorMode() && HiveConf.getBoolVar( hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); useFastContainer = vectorDesc != null && vectorDesc.hashTableImplementationType() == VectorMapJoinDesc.HashTableImplementationType.FAST; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java index c4b95c3..c890674 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java @@ -20,6 +20,8 @@ import java.util.Arrays; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + /** * This class collects column information for copying a row from one VectorizedRowBatch to * same/another batch. @@ -30,7 +32,7 @@ protected int[] sourceColumns; protected int[] outputColumns; - protected String[] typeNames; + protected TypeInfo[] typeInfos; protected VectorColumnOrderedMap vectorColumnMapping; @@ -38,7 +40,7 @@ public VectorColumnMapping(String name) { this.vectorColumnMapping = new VectorColumnOrderedMap(name); } - public abstract void add(int sourceColumn, int outputColumn, String typeName); + public abstract void add(int sourceColumn, int outputColumn, TypeInfo typeInfo); public abstract void finalize(); @@ -54,8 +56,8 @@ public int getCount() { return outputColumns; } - public String[] getTypeNames() { - return typeNames; + public TypeInfo[] getTypeInfos() { + return typeInfos; } @Override @@ -65,7 +67,7 @@ public String toString() { sb.append(", "); sb.append("output columns: " + Arrays.toString(outputColumns)); sb.append(", "); - sb.append("type names: " + Arrays.toString(typeNames)); + sb.append("type infos: " + Arrays.toString(typeInfos)); return sb.toString(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java index 0e6014b..97d55f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java @@ -23,8 +23,10 @@ import java.util.TreeMap; import org.apache.commons.lang.ArrayUtils; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * This class collects column information for mapping vector columns, including the hive type name. @@ -43,17 +45,17 @@ private class Value { int valueColumn; - String typeName; + TypeInfo typeInfo; - Value(int valueColumn, String typeName) { + Value(int valueColumn, TypeInfo typeInfo) { this.valueColumn = valueColumn; - this.typeName = typeName; + this.typeInfo = typeInfo; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append("(value column: " + valueColumn); - sb.append(", type name: " + typeName + ")"); + sb.append(", type info: " + typeInfo.toString() + ")"); return sb.toString(); } } @@ -62,12 +64,12 @@ public String toString() { private final int[] orderedColumns; private final int[] valueColumns; - private final String[] typeNames; + private final TypeInfo[] typeInfos; - Mapping(int[] orderedColumns, int[] valueColumns, String[] typeNames) { + Mapping(int[] orderedColumns, int[] valueColumns, TypeInfo[] typeInfos) { this.orderedColumns = orderedColumns; this.valueColumns = valueColumns; - this.typeNames = typeNames; + this.typeInfos = typeInfos; } public int getCount() { @@ -82,8 +84,8 @@ public int getCount() { return valueColumns; } - public String[] getTypeNames() { - return typeNames; + public TypeInfo[] getTypeInfos() { + return typeInfos; } } @@ -92,14 +94,14 @@ public VectorColumnOrderedMap(String name) { orderedTreeMap = new TreeMap(); } - public void add(int orderedColumn, int valueColumn, String typeName) { + public void add(int orderedColumn, int valueColumn, TypeInfo typeInfo) { if (orderedTreeMap.containsKey(orderedColumn)) { throw new RuntimeException( name + " duplicate column " + orderedColumn + " in ordered column map " + orderedTreeMap.toString() + - " when adding value column " + valueColumn + ", type " + typeName); + " when adding value column " + valueColumn + ", type into " + typeInfo.toString()); } - orderedTreeMap.put(orderedColumn, new Value(valueColumn, typeName)); + orderedTreeMap.put(orderedColumn, new Value(valueColumn, typeInfo)); } public boolean orderedColumnsContain(int orderedColumn) { @@ -109,17 +111,16 @@ public boolean orderedColumnsContain(int orderedColumn) { public Mapping getMapping() { ArrayList orderedColumns = new ArrayList(); ArrayList valueColumns = new ArrayList(); - ArrayList typeNames = new ArrayList(); + ArrayList typeInfos = new ArrayList(); for (Map.Entry entry : orderedTreeMap.entrySet()) { orderedColumns.add(entry.getKey()); Value value = entry.getValue(); valueColumns.add(value.valueColumn); - typeNames.add(value.typeName); + typeInfos.add(value.typeInfo); } return new Mapping( ArrayUtils.toPrimitive(orderedColumns.toArray(new Integer[0])), ArrayUtils.toPrimitive(valueColumns.toArray(new Integer[0])), - typeNames.toArray(new String[0])); - + typeInfos.toArray(new TypeInfo[0])); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java index f35aff7..4ceff6b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * This class collects column information for copying a row from one VectorizedRowBatch to @@ -35,9 +36,9 @@ public VectorColumnOutputMapping(String name) { } @Override - public void add(int sourceColumn, int outputColumn, String typeName) { + public void add(int sourceColumn, int outputColumn, TypeInfo typeInfo) { // Order on outputColumn. - vectorColumnMapping.add(outputColumn, sourceColumn, typeName); + vectorColumnMapping.add(outputColumn, sourceColumn, typeInfo); } public boolean containsOutputColumn(int outputColumn) { @@ -51,7 +52,7 @@ public void finalize() { // Ordered columns are the output columns. sourceColumns = mapping.getValueColumns(); outputColumns = mapping.getOrderedColumns(); - typeNames = mapping.getTypeNames(); + typeInfos = mapping.getTypeInfos(); // Not needed anymore. vectorColumnMapping = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java index 4f5ba9a..061e396 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * This class collects column information for copying a row from one VectorizedRowBatch to @@ -35,9 +36,9 @@ public VectorColumnSourceMapping(String name) { } @Override - public void add(int sourceColumn, int outputColumn, String typeName) { + public void add(int sourceColumn, int outputColumn, TypeInfo typeInfo) { // Order on sourceColumn. - vectorColumnMapping.add(sourceColumn, outputColumn, typeName); + vectorColumnMapping.add(sourceColumn, outputColumn, typeInfo); } @Override @@ -47,7 +48,7 @@ public void finalize() { // Ordered columns are the source columns. sourceColumns = mapping.getOrderedColumns(); outputColumns = mapping.getValueColumns(); - typeNames = mapping.getTypeNames(); + typeInfos = mapping.getTypeInfos(); // Not needed anymore. vectorColumnMapping = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index c8e0284..911aeb0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -262,8 +262,7 @@ public void init(VectorColumnMapping columnMapping) throws HiveException { for (int i = 0; i < count; i++) { int inputColumn = columnMapping.getInputColumns()[i]; int outputColumn = columnMapping.getOutputColumns()[i]; - String typeName = columnMapping.getTypeNames()[i].toLowerCase(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + TypeInfo typeInfo = columnMapping.getTypeInfos()[i]; Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); CopyRow copyRowByValue = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 2598445..fd885a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; import com.google.common.annotations.VisibleForTesting; @@ -50,9 +51,8 @@ public VectorFilterOperator(CompilationOpContext ctx, VectorizationContext vContext, OperatorDesc conf) throws HiveException { this(ctx); - ExprNodeDesc oldExpression = ((FilterDesc) conf).getPredicate(); - conditionEvaluator = vContext.getVectorExpression(oldExpression, VectorExpressionDescriptor.Mode.FILTER); this.conf = (FilterDesc) conf; + conditionEvaluator = ((VectorFilterDesc) this.conf.getVectorDesc()).getPredicateExpression(); } /** Kryo ctor. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 2605203..fef7c2a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.util.JavaDataModel; @@ -65,6 +66,8 @@ private static final Logger LOG = LoggerFactory.getLogger( VectorGroupByOperator.class.getName()); + private VectorGroupByDesc vectorDesc; + /** * This is the vector of aggregators. They are stateless and only implement * the algorithm of how to compute the aggregation. state is kept in the @@ -756,16 +759,10 @@ public VectorGroupByOperator(CompilationOpContext ctx, this(ctx); GroupByDesc desc = (GroupByDesc) conf; this.conf = desc; - List keysDesc = desc.getKeys(); - keyExpressions = vContext.getVectorExpressions(keysDesc); - ArrayList aggrDesc = desc.getAggregators(); - aggregators = new VectorAggregateExpression[aggrDesc.size()]; - for (int i = 0; i < aggrDesc.size(); ++i) { - AggregationDesc aggDesc = aggrDesc.get(i); - aggregators[i] = vContext.getAggregatorExpression(aggDesc); - } - - isVectorOutput = desc.getVectorDesc().isVectorOutput(); + vectorDesc = (VectorGroupByDesc) desc.getVectorDesc(); + keyExpressions = vectorDesc.getKeyExpressions(); + aggregators = vectorDesc.getAggregators(); + isVectorOutput = vectorDesc.isVectorOutput(); vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(), /* vContextEnvironment */ vContext); @@ -834,7 +831,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { forwardCache = new Object[outputKeyLength + aggregators.length]; - switch (conf.getVectorDesc().getProcessingMode()) { + switch (vectorDesc.getProcessingMode()) { case GLOBAL: Preconditions.checkState(outputKeyLength == 0); processingMode = this.new ProcessingModeGlobalAggregate(); @@ -850,7 +847,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { break; default: throw new RuntimeException("Unsupported vector GROUP BY processing mode " + - conf.getVectorDesc().getProcessingMode().name()); + vectorDesc.getProcessingMode().name()); } processingMode.initialize(hconf); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index bb382b1..5c490ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -45,9 +46,11 @@ private static final long serialVersionUID = 1L; - protected VectorExpression[] vExpressions = null; + private VectorSelectDesc vectorDesc; - private transient int [] projectedColumns = null; + private VectorExpression[] vExpressions = null; + + private int [] projectedOutputColumns = null; private transient VectorExpressionWriter [] valueWriters = null; @@ -58,13 +61,9 @@ public VectorSelectOperator(CompilationOpContext ctx, VectorizationContext vContext, OperatorDesc conf) throws HiveException { this(ctx); this.conf = (SelectDesc) conf; - List colList = this.conf.getColList(); - vExpressions = new VectorExpression[colList.size()]; - for (int i = 0; i < colList.size(); i++) { - ExprNodeDesc expr = colList.get(i); - VectorExpression ve = vContext.getVectorExpression(expr); - vExpressions[i] = ve; - } + vectorDesc = (VectorSelectDesc) this.conf.getVectorDesc(); + vExpressions = vectorDesc.getSelectExpressions(); + projectedOutputColumns = vectorDesc.getProjectedOutputColumns(); /** * Create a new vectorization context to create a new projection, but keep @@ -73,11 +72,10 @@ public VectorSelectOperator(CompilationOpContext ctx, vOutContext = new VectorizationContext(getName(), vContext); vOutContext.resetProjectionColumns(); - for (int i=0; i < colList.size(); ++i) { - String columnName = this.conf.getOutputColumnNames().get(i); - VectorExpression ve = vExpressions[i]; - vOutContext.addProjectionColumn(columnName, - ve.getOutputColumn()); + List outputColumnNames = this.conf.getOutputColumnNames(); + for (int i=0; i < projectedOutputColumns.length; ++i) { + String columnName = outputColumnNames.get(i); + vOutContext.addProjectionColumn(columnName, projectedOutputColumns[i]); } } @@ -110,12 +108,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { List outputFieldNames = conf.getOutputColumnNames(); outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector( outputFieldNames, objectInspectors); - - projectedColumns = new int [vExpressions.length]; - for (int i = 0; i < projectedColumns.length; i++) { - vExpressions[i].init(hconf); - projectedColumns[i] = vExpressions[i].getOutputColumn(); - } } @Override @@ -140,8 +132,8 @@ public void process(Object row, int tag) throws HiveException { // Prepare output, set the projections int[] originalProjections = vrg.projectedColumns; int originalProjectionSize = vrg.projectionSize; - vrg.projectionSize = vExpressions.length; - vrg.projectedColumns = this.projectedColumns; + vrg.projectionSize = projectedOutputColumns.length; + vrg.projectedColumns = this.projectedOutputColumns; forward(vrg, outputObjInspector); // Revert the projected columns back, because vrg will be re-used. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 484f615..4802489 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -94,6 +94,7 @@ import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -563,7 +564,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress } else { throw new HiveException( "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() - + " because hive.vectorized.adaptor.usage.mode=chosen " + + " because hive.vectorized.adaptor.usage.mode=chosen" + " and the UDF wasn't one of the chosen ones"); } break; @@ -1286,6 +1287,35 @@ private String getNewInstanceArgumentString(Object [] args) { return "arguments: " + Arrays.toString(args) + ", argument classes: " + argClasses.toString(); } + private static int STACK_LENGTH_LIMIT = 15; + + public static String getStackTraceAsSingleLine(Throwable e) { + StringBuilder sb = new StringBuilder(); + sb.append(e); + sb.append(" stack trace: "); + StackTraceElement[] stackTrace = e.getStackTrace(); + int length = stackTrace.length; + boolean isTruncated = false; + if (length > STACK_LENGTH_LIMIT) { + length = STACK_LENGTH_LIMIT; + isTruncated = true; + } + for (int i = 0; i < length; i++) { + if (i > 0) { + sb.append(", "); + } + sb.append(stackTrace[i]); + } + if (isTruncated) { + sb.append(", ..."); + } + + // Attempt to cleanup stack trace elements that vary by VM. + String cleaned = sb.toString().replaceAll("GeneratedConstructorAccessor[0-9]*", "GeneratedConstructorAccessor"); + + return cleaned; + } + private VectorExpression instantiateExpression(Class vclass, TypeInfo returnType, Object...args) throws HiveException { VectorExpression ve = null; @@ -1297,14 +1327,14 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT ve = (VectorExpression) ctor.newInstance(); } catch (Exception ex) { throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + - StringUtils.stringifyException(ex)); + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength) { try { ve = (VectorExpression) ctor.newInstance(args); } catch (Exception ex) { throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + - StringUtils.stringifyException(ex)); + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength + 1) { // Additional argument is needed, which is the outputcolumn. @@ -1332,7 +1362,7 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT ve.setOutputType(returnTypeName); } catch (Exception ex) { throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + - StringUtils.stringifyException(ex)); + getStackTraceAsSingleLine(ex)); } } // Add maxLength parameter to UDFs that have CHAR or VARCHAR output. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java index a403725..914bb1f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java @@ -20,10 +20,10 @@ /** * VectorizationContextRegion optional interface implemented by vectorized operators - * that are changing the vectorizaiton context (region boundary operators) + * that are changing the vectorization context (region boundary operators) */ public interface VectorizationContextRegion { VectorizationContext getOuputVectorizationContext(); -} +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 3e3844e..e546a65 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -38,7 +38,10 @@ import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java index c50af8d..b49ff39 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java @@ -518,6 +518,11 @@ public void setPattern(String pattern) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", pattern " + pattern; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java index 57dc92b..96c08af 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java index 1f7697e..a120f2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java index 187f12b..447e258 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java @@ -51,4 +51,8 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java index 5ad745c..98c1f93 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java index e753a6e..3bcd989 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java @@ -55,4 +55,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java index ea235d9..e1debcd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java @@ -148,6 +148,11 @@ public void setInputColumn(int inputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java index 3a2c2d0..3b4f05b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java @@ -55,4 +55,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java index 07f94f5..e38e32b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -114,6 +114,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java index 27674c4..eac45e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java index ceefd61..86e0959 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java @@ -94,6 +94,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java index 4de95a5..9f71b9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -111,6 +111,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java index 7c3dca2..9bc1cdb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java index b1c6b2d..4cc120a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -112,6 +112,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java index 7c06ff5..3469183 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java @@ -52,4 +52,8 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java index 376ce92..fd4c76a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java @@ -52,4 +52,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index e456b12..4b176ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -149,6 +149,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java index 504b354..074f9aa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java @@ -154,6 +154,11 @@ public void setInputColumn(int inputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index c8844c8..e577628 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -148,6 +148,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java index 62f3dc9..21b034a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java @@ -146,6 +146,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java index b8a58cd..0e23bfb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -21,8 +21,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; public class CastTimestampToBoolean extends VectorExpression { private static final long serialVersionUID = 1L; @@ -125,6 +123,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java index a955d79..92595d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -118,6 +118,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java index ba2e823..466043e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -21,8 +21,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; public class CastTimestampToLong extends VectorExpression { private static final long serialVersionUID = 1L; @@ -120,6 +118,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index ff7371d..42f9b60 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -314,6 +314,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 60ed2d4..297c372 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -317,6 +317,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index 72749b7..487c4b0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -268,6 +269,38 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + String value; + if (isNullValue) { + value = "null"; + } else { + switch (type) { + case LONG: + value = Long.toString(longValue); + break; + case DOUBLE: + value = Double.toString(doubleValue); + break; + case BYTES: + value = new String(bytesValue, StandardCharsets.UTF_8); + break; + case DECIMAL: + value = decimalValue.toString(); + break; + case TIMESTAMP: + value = timestampValue.toString(); + break; + case INTERVAL_DAY_TIME: + value = intervalDayTimeValue.toString(); + break; + default: + throw new RuntimeException("Unknown vector column type " + type); + } + } + return "val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()).build(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java index fafacce..e04280f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java @@ -168,6 +168,10 @@ public String getOutputType() { return "timestamp"; } + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java index a9ca93c..bce24ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java @@ -132,6 +132,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java index 59cf9da..62f29f1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java @@ -135,6 +135,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java index d4d8fea..9a42f50 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import java.util.Arrays; import java.util.HashSet; /** @@ -155,4 +156,10 @@ public Descriptor getDescriptor() { public void setInListValues(HiveDecimal[] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java index 4b1182c..a9e1f8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java @@ -134,6 +134,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java index dfc1aff..db65460 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -156,6 +158,11 @@ public void setInListValues(double[] a) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", values " + Arrays.toString(inListValues); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // return null since this will be handled as a special case in VectorizationContext diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColAndScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColAndScalar.java index bef1c18..578feb0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColAndScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColAndScalar.java @@ -73,6 +73,11 @@ public void setValue(long value) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColOrScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColOrScalar.java index ee0ac69..72f58b1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColOrScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColOrScalar.java @@ -73,6 +73,11 @@ public void setValue(long value) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java index 79d3fe3..48f4a93 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import java.util.Arrays; import java.util.HashSet; /** @@ -171,4 +172,10 @@ public Descriptor getDescriptor() { public void setInListValues(HiveDecimal[] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java index 05dcb43..0252236 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java @@ -177,4 +177,10 @@ public Descriptor getDescriptor() { public void setInListValues(double [] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java index 41e3b0f..175b497 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -51,6 +53,12 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + // The children are input. + return null; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // IMPORTANT NOTE: For Multi-AND, the VectorizationContext class will catch cases with 3 or diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java index dc5139d..5ed1ed8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java @@ -232,6 +232,12 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + // The children are input. + return null; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // IMPORTANT NOTE: For Multi-OR, the VectorizationContext class will catch cases with 3 or diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java index a40f39c..dce1b43 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java @@ -177,4 +177,11 @@ public Descriptor getDescriptor() { public void setInListValues(long [] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarAndColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarAndColumn.java index bcc8f89..7092f4b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarAndColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarAndColumn.java @@ -73,6 +73,11 @@ public void setValue(long value) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarOrColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarOrColumn.java index f515e60..ab242ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarOrColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarOrColumn.java @@ -73,6 +73,11 @@ public void setValue(long value) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java index e34ec75..72ec912 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -179,4 +181,40 @@ public Descriptor getDescriptor() { public void setInListValues(byte [][] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + StringBuilder sb = new StringBuilder(); + sb.append("col "); + sb.append(inputCol); + sb.append(", values "); + boolean isFirst = true; + for (byte[] bytes : inListValues) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append(displayBytes(bytes, 0, bytes.length)); + } + return sb.toString(); + } + + public static boolean isDisplayableAscii(byte b) + { + return (b > 32 ) && (b < 127); + } + + public static String displayBytes(byte[] bytes, int start, int length) { + StringBuilder sb = new StringBuilder(); + for (int i = start; i < start + length; i++) { + byte b = bytes[i]; + if (isDisplayableAscii(b)) { + sb.append(b); + } else { + sb.append(String.format("\\%03d", (int) (b & 0xff))); + } + } + return sb.toString(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java index 1e21fea..8b873f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -176,4 +177,12 @@ public void setStructColumnExprs(VectorizationContext vContext, } this.fieldVectorColumnTypes = fieldVectorColumnTypes; } + + @Override + public String vectorExpressionParameters() { + return "structExpressions " + Arrays.toString(structExpressions) + + ", fieldVectorColumnTypes " + Arrays.toString(fieldVectorColumnTypes) + + ", structColumnMap " + Arrays.toString(structColumnMap); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java index 25a276a..a7666bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import java.util.HashSet; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -169,4 +170,10 @@ public Descriptor getDescriptor() { public void setInListValues(Timestamp[] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java index cc6afa5..76fdeb5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java @@ -134,6 +134,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java index 7f005a1..8dbb7b9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java @@ -131,6 +131,11 @@ public int getOutputColumn() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java index 561c152..569d7f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java @@ -122,6 +122,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java index 0120c0a..1b3127c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java @@ -130,6 +130,10 @@ public void setInputColumn(int inputColumn) { this.inputColumn = inputColumn; } + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java index b73e851..b527482 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java @@ -130,6 +130,10 @@ public void setInputColumn(int inputColumn) { this.inputColumn = inputColumn; } + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java index fa0a746..db45ed4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java @@ -143,6 +143,10 @@ public String getOutputType() { return "String"; } + @Override + public String vectorExpressionParameters() { + return "col " + inputCol; + } @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java index a18bb55..9eead7b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java @@ -120,6 +120,10 @@ public String getOutputType() { return outputType; } + public String vectorExpressionParameters() { + return "col " + colNum + ", decimalPlaces " + decimalPlaces; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java index 774551c..5f4e83a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java @@ -131,6 +131,11 @@ public void setInputColumn(int inputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java index b84d9be..b652226 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java @@ -132,6 +132,11 @@ public void setInputColumn(int inputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java index 402d0f8..2385a40 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java @@ -81,6 +81,11 @@ public void setType(String type) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()).build(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index f0f4f6d..514b453 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -167,6 +167,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index 804923e..98fa29e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -136,6 +136,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java index 8face7d..9dc3669 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java @@ -130,6 +130,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", val "+ arg3Scalar.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java index 40f2e08..4d4649f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java @@ -132,6 +132,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java index 43676dd..c8f3294 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java @@ -120,6 +120,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 06ba8f8..4c6015e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -166,6 +166,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index ca11a55..c8367c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -177,6 +177,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java index 4e09448..8b18ae0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -164,6 +166,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", val "+ new String(arg3Scalar, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java index 79ed71e..3a0c035 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -164,6 +166,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ new String(arg2Scalar, StandardCharsets.UTF_8) + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java index 2a35970..4a51693 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -133,6 +135,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ new String(arg2Scalar, StandardCharsets.UTF_8) + ", val "+ new String(arg3Scalar, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index 8441863..8219b3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -133,4 +133,9 @@ public int getOutputColumn() { public String getOutputType() { return "long"; } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java index 6b87ff2..eb0c1c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java @@ -130,4 +130,10 @@ public int getOutputColumn() { public String getOutputType() { return "timestamp"; } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", val "+ arg3Scalar; + } + } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java index 2162f17..3e4a195 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java @@ -131,4 +131,10 @@ public int getOutputColumn() { public String getOutputType() { return "timestamp"; } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java index 707f574..5273131 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java @@ -117,4 +117,10 @@ public int getOutputColumn() { public String getOutputType() { return "timestamp"; } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java index f19551e..2f6e7b9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java @@ -107,6 +107,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java index 3169bae..583ab7a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java @@ -105,6 +105,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java index 33f50e0..6fa9779 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java @@ -174,6 +174,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java index 68b6a87..f26c8e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java @@ -141,6 +141,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java index a77d41a..3b3c923 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java index be717a1..c174d5f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java @@ -135,6 +135,10 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java index 6ee5daf..dd2c3dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java index cd8d723..710ac23 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java index 053ced9..c8e07f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java index 16148f3..a234ae1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java index 25d52b3..8db8b86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java index 927856f..b06a876 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java index e6e54e9..b44e9bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java index ac10a83..ada4312 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java index 865fdb9..fa667ca 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java index 789ca3e..7d16ae0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java index fb15880..babac22 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -154,6 +156,10 @@ public void setInListValues(long [] a) { this.inListValues = a; } + public String vectorExpressionParameters() { + return "col " + colNum + ", values " + Arrays.toString(inListValues); + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java index 3c442da..b1958f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java @@ -153,6 +153,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java index 09d845c..a4cea31 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java @@ -135,6 +135,10 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java index afc80eb..15ba69b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java index e2e871d..38984c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java index ae675ed..47fb591 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java @@ -135,6 +135,10 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java index c664e35..d5801d7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java index 776ab9f..b6bbfd1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java index fbca683..80b79a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java @@ -135,6 +135,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java index f95c9ec..b8e3489 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java @@ -34,7 +34,7 @@ public abstract class MathFuncDoubleToDouble extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum; + protected int colNum; private int outputColumn; // Subclasses must override this with a function that implements the desired logic. @@ -133,4 +133,9 @@ public void setColNum(int colNum) { public String getOutputType() { return "double"; } + + @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java index 4b4f38d..3b55d06 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java @@ -133,4 +133,9 @@ public void setColNum(int colNum) { public String getOutputType() { return "double"; } + + @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java index 4b1f908..5e36c09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java @@ -33,7 +33,7 @@ public abstract class MathFuncLongToLong extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum; + protected int colNum; private int outputColumn; // Subclasses must override this with a function that implements the desired logic. @@ -125,4 +125,9 @@ public void setColNum(int colNum) { public String getOutputType() { return "long"; } + + @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java index ea2a434..1ece4a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -122,6 +122,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java index 39a3d87..0990095 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java @@ -55,6 +55,11 @@ public double getDivisor() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", divisor " + divisor; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java index 12b7286..4809011 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java @@ -55,6 +55,11 @@ public long getDivisor() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", divisor " + divisor; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundWithNumDigitsDoubleToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundWithNumDigitsDoubleToDouble.java index b8dfb41..4b791b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundWithNumDigitsDoubleToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundWithNumDigitsDoubleToDouble.java @@ -59,6 +59,11 @@ public void setArg(long l) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", decimalPlaces " + decimalPlaces.get(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java index 77749e9..a906bef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java @@ -138,6 +138,10 @@ public void setColNum1(int colNum1) { this.colNum1 = colNum1; } + public String vectorExpressionParameters() { + return "col " + colNum1; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java index 733e2a6..f8517dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java @@ -108,6 +108,11 @@ public void setColNum(int colNum) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.FILTER) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java index 7159178..b792bbe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java @@ -106,6 +106,11 @@ public void setColNum(int colNum) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.FILTER) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java index f387a5c..b58b49e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java @@ -138,6 +138,11 @@ public void setColNum1(int colNum1) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.FILTER) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java index b914196..cb3870e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java @@ -162,8 +162,12 @@ public String getOutputType() { return "String_Family"; } - @Override - public Descriptor getDescriptor() { + public String vectorExpressionParameters() { + return "col " + colNum; + } + +@Override +public Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( VectorExpressionDescriptor.Mode.PROJECTION) @@ -174,6 +178,6 @@ public Descriptor getDescriptor() { .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } +} } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java index b90e3c0..b1ceb9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java @@ -175,4 +175,9 @@ public Descriptor getDescriptor() { public void setInListValues(byte [][] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java index 1cd3c46..bd44390 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -149,6 +151,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(value, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java index 56bc97b..35666d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java @@ -440,6 +440,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index 76602be..cdaf694 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -156,6 +156,10 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + return "col " + colNum; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java index b98f72d..b1e1dad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -149,6 +151,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 75a99f0..305d1a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -245,6 +245,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", start " + startIdx; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index 0ff7af6..4a7dbdc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -272,6 +272,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", start " + startIdx + ", length " + length; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 016a695..527d3b3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -195,6 +195,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java index 89ef251..c87371f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java @@ -136,6 +136,10 @@ public String getOutputType() { return "String"; } + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java index 8134108..7d25446 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; @@ -172,4 +173,11 @@ public void setStructColumnExprs(VectorizationContext vContext, } this.fieldVectorColumnTypes = fieldVectorColumnTypes; } + + @Override + public String vectorExpressionParameters() { + return "structExpressions " + Arrays.toString(structExpressions) + + ", fieldVectorColumnTypes " + Arrays.toString(fieldVectorColumnTypes) + + ", structColumnMap " + Arrays.toString(structColumnMap); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java index bc09a3a..5e76de8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import java.util.HashSet; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -150,4 +151,9 @@ public Descriptor getDescriptor() { public void setInListValues(Timestamp[] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java index 052d57c..32cf527 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java @@ -133,6 +133,10 @@ public String getOutputType() { return "String"; } + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java index 543d7f0..c0870c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java @@ -141,6 +141,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "columns " + Arrays.toString(inputColumns); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // Descriptor is not defined because it takes variable number of arguments with different diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java index 329d381..5e0e7aa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -129,6 +131,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "columns " + Arrays.toString(inputColumns); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // Descriptor is not defined because it takes variable number of arguments with different // data types. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 218f306..3df9acd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -149,25 +149,42 @@ public void setInputTypes(Type ... inputTypes) { return inputTypes; } + public String vectorExpressionParameters() { + return null; + } + @Override public String toString() { StringBuilder b = new StringBuilder(); - b.append(this.getClass().getSimpleName()); - b.append("["); - b.append(this.getOutputColumn()); - b.append(":"); - b.append(this.getOutputType()); - b.append("]"); - if (childExpressions != null) { - b.append("("); - for (int i = 0; i < childExpressions.length; i++) { - b.append(childExpressions[i].toString()); - if (i < childExpressions.length-1) { - b.append(" "); + if (this instanceof IdentityExpression) { + b.append(vectorExpressionParameters()); + } else { + b.append(this.getClass().getSimpleName()); + String vectorExpressionParameters = vectorExpressionParameters(); + if (vectorExpressionParameters != null) { + b.append("("); + b.append(vectorExpressionParameters); + b.append(")"); + } + if (childExpressions != null) { + b.append("(children: "); + for (int i = 0; i < childExpressions.length; i++) { + b.append(childExpressions[i].toString()); + if (i < childExpressions.length-1) { + b.append(", "); + } } + b.append(")"); + } + b.append(" -> "); + int outputColumn = getOutputColumn(); + if (outputColumn != -1) { + b.append(outputColumn); + b.append(":"); } - b.append(")"); + b.append(getOutputType()); } + return b.toString(); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java index 4ce6e20..00e9e03 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -216,6 +216,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index 0e09f49..730dc36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -261,6 +261,11 @@ public void setNumDay(int numDays) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + numDays; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 74ef6a6..f0a676d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -207,6 +207,11 @@ public void setPositive(boolean isPositive) { this.isPositive = isPositive; } + @Override + public String vectorExpressionParameters() { + return "val " + stringValue + ", col " + colNum; + } + public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java index 4edf558..d3c5da2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java @@ -358,6 +358,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 15edbdf..4988630 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; +import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; import java.text.ParseException; @@ -299,6 +300,11 @@ public void setStringValue(byte[] stringValue) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(stringValue, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index 371537a..d5ee1eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -298,6 +298,11 @@ public void setStringValue(byte[] stringValue) { } @Override + public String vectorExpressionParameters() { + return "val " + stringValue + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java index 0a3a87a..0255cfa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hive.common.util.DateUtils; import com.google.common.base.Preconditions; @@ -150,6 +151,15 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + if (field == -1) { + return "col " + colNum; + } else { + return "col " + colNum + ", field " + DateUtils.getFieldName(field); + } + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java index 45e7a31..6719ce3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -177,6 +177,15 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + if (fieldStart == -1) { + return "col " + colNum; + } else { + return "col " + colNum + ", fieldStart " + fieldStart + ", fieldLength " + fieldLength; + } + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java index 5fca678..e9000c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hive.common.util.DateUtils; import com.google.common.base.Preconditions; @@ -148,6 +149,14 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + if (field == -1) { + return "col " + colNum; + } else { + return "col " + colNum + ", field " + DateUtils.getFieldName(field); + } + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java index 96e62cf..0866f63 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -55,7 +56,25 @@ public abstract void aggregateInputSelection(VectorAggregationBufferRow[] aggreg public boolean hasVariableSize() { return false; } + public abstract VectorExpression inputExpression(); public abstract void init(AggregationDesc desc) throws HiveException; + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(this.getClass().getSimpleName()); + VectorExpression inputExpression = inputExpression(); + if (inputExpression != null) { + sb.append("("); + sb.append(inputExpression.toString()); + sb.append(") -> "); + } else { + sb.append("(*) -> "); + } + ObjectInspector outputObjectInspector = getOutputObjectInspector(); + sb.append(outputObjectInspector.getTypeName()); + return sb.toString(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java index 4f6d652..74e25ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java @@ -93,6 +93,12 @@ public void reset() { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Object[] partialResult; transient private LongWritable resultCount; transient private HiveDecimalWritable resultSum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java index d0a1d0d..483d9dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java @@ -84,6 +84,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Object[] partialResult; transient private LongWritable resultCount; transient private DoubleWritable resultSum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java index 3ecb82e..2139eae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java @@ -55,6 +55,12 @@ private static final long serialVersionUID = 1L; private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private long expectedEntries = -1; private ValueProcessor valueProcessor; transient private int bitSetSize = -1; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java index ad190b7..d2446d5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java @@ -40,6 +40,12 @@ private static final long serialVersionUID = 1L; private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private long expectedEntries = -1; transient private int aggBufferSize = -1; transient private BytesWritable bw = new BytesWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java index cf373a1..494febc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java @@ -60,6 +60,12 @@ public void reset() { } private VectorExpression inputExpression = null; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final LongWritable result; public VectorUDAFCount(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java index 577977f..dec88cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java @@ -61,6 +61,12 @@ public void reset() { } private VectorExpression inputExpression = null; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final LongWritable result; public VectorUDAFCountMerge(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java index 72beda8..337ba0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java @@ -57,6 +57,13 @@ public void reset() { } } + + @Override + public VectorExpression inputExpression() { + // None. + return null; + } + transient private final LongWritable result; public VectorUDAFCountStar(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java index fa25e6a..8cd3506 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java index b3e1fae..61d6977 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java index 508c960..b10f66f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java @@ -75,6 +75,11 @@ public void reset() { private VectorExpression inputExpression; + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + public VectorUDAFSumDecimal(VectorExpression inputExpression) { this(); this.inputExpression = inputExpression; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java index 970ec22..2709b07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java index 9af1a28..03dce1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index 476446a..f854132 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; + import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -63,6 +65,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import com.google.common.base.Preconditions; + /** * This class is common operator class for native vectorized map join. * @@ -72,7 +76,33 @@ */ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implements VectorizationContextRegion { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinCommonOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinCommonOperator.class.getName(); +private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected abstract String getLoggingPrefix(); + + // For debug tracing: information about the map or reduce task, operator, operator class, etc. + protected transient String loggingPrefix; + + protected String getLoggingPrefix(String className) { + if (loggingPrefix == null) { + initLoggingPrefix(className); + } + return loggingPrefix; + } + + protected void initLoggingPrefix(String className) { + loggingPrefix = className; + } + + //------------------------------------------------------------------------------------------------ + + protected VectorMapJoinDesc vectorDesc; + + protected VectorMapJoinInfo vectorMapJoinInfo; // Whether this operator is an outer join. protected boolean isOuterJoin; @@ -88,10 +118,10 @@ // a mixture of input big table columns and new scratch columns. protected VectorizationContext vOutContext; - // The output column projection of the vectorized row batch. And, the type names of the output + // The output column projection of the vectorized row batch. And, the type infos of the output // columns. protected int[] outputProjection; - protected String[] outputTypeNames; + protected TypeInfo[] outputTypeInfos; // These are the vectorized batch expressions for filtering, key expressions, and value // expressions. @@ -101,15 +131,17 @@ // This is map of which vectorized row batch columns are the big table key columns. Since // we may have key expressions that produce new scratch columns, we need a mapping. - // And, we have their type names. + // And, we have their type infos. protected int[] bigTableKeyColumnMap; - protected ArrayList bigTableKeyTypeNames; + protected String[] bigTableKeyColumnNames; + protected TypeInfo[] bigTableKeyTypeInfos; // Similarly, this is map of which vectorized row batch columns are the big table value columns. // Since we may have value expressions that produce new scratch columns, we need a mapping. - // And, we have their type names. + // And, we have their type infos. protected int[] bigTableValueColumnMap; - protected ArrayList bigTableValueTypeNames; + protected String[] bigTableValueColumnNames; + protected TypeInfo[] bigTableValueTypeInfos; // This is a mapping of which big table columns (input and key/value expressions) will be // part of the big table portion of the join output result. @@ -124,6 +156,8 @@ // to output batch scratch columns for the small table portion. protected VectorColumnSourceMapping smallTableMapping; + protected VectorColumnSourceMapping projectionMapping; + // These are the output columns for the small table and the outer small table keys. protected int[] smallTableOutputVectorColumns; protected int[] bigTableOuterKeyOutputVectorColumns; @@ -137,9 +171,6 @@ // transient. //--------------------------------------------------------------------------- - // For debug tracing: the name of the map or reduce task. - protected transient String taskName; - // The threshold where we should use a repeating vectorized row batch optimization for // generating join output results. protected transient boolean useOverflowRepeatedThreshold; @@ -192,6 +223,9 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, MapJoinDesc desc = (MapJoinDesc) conf; this.conf = desc; + vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); + vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo(); + Preconditions.checkState(vectorMapJoinInfo != null); this.vContext = vContext; @@ -210,217 +244,28 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), VectorExpressionDescriptor.Mode.FILTER); - List keyDesc = desc.getKeys().get(posBigTable); - bigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); - - // Since a key expression can be a calculation and the key will go into a scratch column, - // we need the mapping and type information. - bigTableKeyColumnMap = new int[bigTableKeyExpressions.length]; - bigTableKeyTypeNames = new ArrayList(); - boolean onlyColumns = true; - for (int i = 0; i < bigTableKeyColumnMap.length; i++) { - VectorExpression ve = bigTableKeyExpressions[i]; - if (!IdentityExpression.isColumnOnly(ve)) { - onlyColumns = false; - } - bigTableKeyTypeNames.add(keyDesc.get(i).getTypeString()); - bigTableKeyColumnMap[i] = ve.getOutputColumn(); - } - if (onlyColumns) { - bigTableKeyExpressions = null; - } - - List bigTableExprs = desc.getExprs().get(posBigTable); - bigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs); - - /* - * Similarly, we need a mapping since a value expression can be a calculation and the value - * will go into a scratch column. - */ - bigTableValueColumnMap = new int[bigTableValueExpressions.length]; - bigTableValueTypeNames = new ArrayList(); - onlyColumns = true; - for (int i = 0; i < bigTableValueColumnMap.length; i++) { - VectorExpression ve = bigTableValueExpressions[i]; - if (!IdentityExpression.isColumnOnly(ve)) { - onlyColumns = false; - } - bigTableValueTypeNames.add(bigTableExprs.get(i).getTypeString()); - bigTableValueColumnMap[i] = ve.getOutputColumn(); - } - if (onlyColumns) { - bigTableValueExpressions = null; - } - - determineCommonInfo(isOuterJoin); - } - - protected void determineCommonInfo(boolean isOuter) throws HiveException { - - bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping"); - - bigTableOuterKeyMapping = new VectorColumnOutputMapping("Big Table Outer Key Mapping"); - - // The order of the fields in the LazyBinary small table value must be used, so - // we use the source ordering flavor for the mapping. - smallTableMapping = new VectorColumnSourceMapping("Small Table Mapping"); - - // We use a mapping object here so we can build the projection in any order and - // get the ordered by 0 to n-1 output columns at the end. - // - // Also, to avoid copying a big table key into the small table result area for inner joins, - // we reference it with the projection so there can be duplicate output columns - // in the projection. - VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); - - /* - * Gather up big and small table output result information from the MapJoinDesc. - */ - List bigTableRetainList = conf.getRetainList().get(posBigTable); - int bigTableRetainSize = bigTableRetainList.size(); - - int[] smallTableIndices; - int smallTableIndicesSize; - List smallTableExprs = conf.getExprs().get(posSingleVectorMapJoinSmallTable); - if (conf.getValueIndices() != null && conf.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { - smallTableIndices = conf.getValueIndices().get(posSingleVectorMapJoinSmallTable); - smallTableIndicesSize = smallTableIndices.length; - } else { - smallTableIndices = null; - smallTableIndicesSize = 0; - } - - List smallTableRetainList = conf.getRetainList().get(posSingleVectorMapJoinSmallTable); - int smallTableRetainSize = smallTableRetainList.size(); - - int smallTableResultSize = 0; - if (smallTableIndicesSize > 0) { - smallTableResultSize = smallTableIndicesSize; - } else if (smallTableRetainSize > 0) { - smallTableResultSize = smallTableRetainSize; - } - - /* - * Determine the big table retained mapping first so we can optimize out (with - * projection) copying inner join big table keys in the subsequent small table results section. - */ - int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); - for (int i = 0; i < bigTableRetainSize; i++) { - - // Since bigTableValueExpressions may do a calculation and produce a scratch column, we - // need to map to the right batch column. - - int retainColumn = bigTableRetainList.get(i); - int batchColumnIndex = bigTableValueColumnMap[retainColumn]; - String typeName = bigTableValueTypeNames.get(i); - - // With this map we project the big table batch to make it look like an output batch. - projectionMapping.add(nextOutputColumn, batchColumnIndex, typeName); - - // Collect columns we copy from the big table batch to the overflow batch. - if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { - // Tolerate repeated use of a big table column. - bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeName); - } - - nextOutputColumn++; - } - - /* - * Now determine the small table results. - */ - int firstSmallTableOutputColumn; - firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0); - int smallTableOutputCount = 0; - nextOutputColumn = firstSmallTableOutputColumn; - - // Small table indices has more information (i.e. keys) than retain, so use it if it exists... - if (smallTableIndicesSize > 0) { - smallTableOutputCount = smallTableIndicesSize; - - for (int i = 0; i < smallTableIndicesSize; i++) { - if (smallTableIndices[i] >= 0) { - - // Zero and above numbers indicate a big table key is needed for - // small table result "area". - - int keyIndex = smallTableIndices[i]; - - // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we - // need to map the right column. - int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; - String typeName = bigTableKeyTypeNames.get(keyIndex); - - if (!isOuter) { - - // Optimize inner join keys of small table results. - - // Project the big table key into the small table result "area". - projectionMapping.add(nextOutputColumn, batchKeyColumn, typeName); - - if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { - // If necessary, copy the big table key into the overflow batch's small table - // result "area". - bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeName); - } - } else { - - // For outer joins, since the small table key can be null when there is no match, - // we must have a physical (scratch) column for those keys. We cannot use the - // projection optimization used by inner joins above. - - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - int scratchColumn = vOutContext.allocateScratchColumn(typeInfo); - projectionMapping.add(nextOutputColumn, scratchColumn, typeName); + bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); + bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames(); + bigTableKeyTypeInfos = vectorMapJoinInfo.getBigTableKeyTypeInfos(); + bigTableKeyExpressions = vectorMapJoinInfo.getBigTableKeyExpressions(); - bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeName); + bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap(); + bigTableValueColumnNames = vectorMapJoinInfo.getBigTableValueColumnNames(); + bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos(); + bigTableValueExpressions = vectorMapJoinInfo.getBigTableValueExpressions(); - bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeName); - } - } else { + bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping(); - // Negative numbers indicate a column to be (deserialize) read from the small table's - // LazyBinary value row. - int smallTableValueIndex = -smallTableIndices[i] - 1; + bigTableOuterKeyMapping = vectorMapJoinInfo.getBigTableOuterKeyMapping(); - String typeName = smallTableExprs.get(i).getTypeString(); + smallTableMapping = vectorMapJoinInfo.getSmallTableMapping(); - // Make a new big table scratch column for the small table value. - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - int scratchColumn = vOutContext.allocateScratchColumn(typeInfo); - projectionMapping.add(nextOutputColumn, scratchColumn, typeName); + projectionMapping = vectorMapJoinInfo.getProjectionMapping(); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeName); - } - nextOutputColumn++; - } - } else if (smallTableRetainSize > 0) { - smallTableOutputCount = smallTableRetainSize; - - // Only small table values appear in join output result. - - for (int i = 0; i < smallTableRetainSize; i++) { - int smallTableValueIndex = smallTableRetainList.get(i); - - // Make a new big table scratch column for the small table value. - TypeInfo typeInfo = smallTableExprs.get(i).getTypeInfo(); - int scratchColumn = vOutContext.allocateScratchColumn(typeInfo); - - String typeName = smallTableExprs.get(i).getTypeString(); - projectionMapping.add(nextOutputColumn, scratchColumn, typeName); - - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeName); - nextOutputColumn++; - } - } - - // Convert dynamic arrays and maps to simple arrays. - - bigTableRetainedMapping.finalize(); - - bigTableOuterKeyMapping.finalize(); + determineCommonInfo(isOuterJoin); + } - smallTableMapping.finalize(); + protected void determineCommonInfo(boolean isOuter) throws HiveException { bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns(); smallTableOutputVectorColumns = smallTableMapping.getOutputColumns(); @@ -432,46 +277,37 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping); - projectionMapping.finalize(); - - // Verify we added an entry for each output. - assert projectionMapping.isSourceSequenceGood(); - outputProjection = projectionMapping.getOutputColumns(); - outputTypeNames = projectionMapping.getTypeNames(); + outputTypeInfos = projectionMapping.getTypeInfos(); if (isLogDebugEnabled) { int[] orderDisplayable = new int[order.length]; for (int i = 0; i < order.length; i++) { orderDisplayable[i] = (int) order[i]; } - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); - - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeNames " + bigTableKeyTypeNames); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + bigTableValueTypeNames); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableIndices " + Arrays.toString(smallTableIndices)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableRetainList " + smallTableRetainList); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor firstSmallTableOutputColumn " + firstSmallTableOutputColumn); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableOutputCount " + smallTableOutputCount); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); - - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputTypeNames " + Arrays.toString(outputTypeNames)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); } setupVOutContext(conf.getOutputColumnNames()); @@ -485,10 +321,10 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { ArrayList list = new ArrayList(); int count = mapping.getCount(); int[] outputColumns = mapping.getOutputColumns(); - String[] typeNames = mapping.getTypeNames(); + TypeInfo[] typeInfos = mapping.getTypeInfos(); for (int i = 0; i < count; i++) { int outputColumn = outputColumns[i]; - String typeName = typeNames[i]; + String typeName = typeInfos[i].getTypeName(); if (VectorizationContext.isStringFamily(typeName)) { list.add(outputColumn); } @@ -503,10 +339,10 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { */ protected void setupVOutContext(List outputColumnNames) { if (isLogDebugEnabled) { - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); } if (outputColumnNames.size() != outputProjection.length) { - throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeNames) + " length mismatch"); + throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeInfos) + " length mismatch"); } vOutContext.resetProjectionColumns(); for (int i = 0; i < outputColumnNames.size(); ++i) { @@ -515,7 +351,7 @@ protected void setupVOutContext(List outputColumnNames) { vOutContext.addProjectionColumn(columnName, outputColumn); if (isLogDebugEnabled) { - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor addProjectionColumn " + i + " columnName " + columnName + " outputColumn " + outputColumn); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor addProjectionColumn " + i + " columnName " + columnName + " outputColumn " + outputColumn); } } } @@ -525,7 +361,7 @@ protected void setupVOutContext(List outputColumnNames) { */ @Override protected HashTableLoader getHashTableLoader(Configuration hconf) { - VectorMapJoinDesc vectorDesc = conf.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); HashTableLoader hashTableLoader; switch (vectorDesc.hashTableImplementationType()) { @@ -549,15 +385,6 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - if (isLogDebugEnabled) { - // Determine the name of our map or reduce task for debug tracing. - BaseWork work = Utilities.getMapWork(hconf); - if (work == null) { - work = Utilities.getReduceWork(hconf); - } - taskName = work.getName(); - } - /* * Get configuration parameters. */ @@ -573,9 +400,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { smallTableVectorDeserializeRow = new VectorDeserializeRow( new LazyBinaryDeserializeRead( - VectorizedBatchUtil.typeInfosFromTypeNames( - smallTableMapping.getTypeNames()), - /* useExternalBuffer */ true)); + smallTableMapping.getTypeInfos(), + /* useExternalBuffer */ true)); smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns()); } @@ -599,13 +425,13 @@ protected void initializeOp(Configuration hconf) throws HiveException { if (isLogDebugEnabled) { int[] currentScratchColumns = vOutContext.currentScratchColumns(); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns)); StructObjectInspector structOutputObjectInspector = (StructObjectInspector) outputObjInspector; List fields = structOutputObjectInspector.getAllStructFieldRefs(); int i = 0; for (StructField field : fields) { - LOG.debug("VectorMapJoinInnerBigOnlyCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName()); i++; } } @@ -616,7 +442,7 @@ protected void completeInitializationOp(Object[] os) throws HiveException { // setup mapJoinTables and serdes super.completeInitializationOp(os); - VectorMapJoinDesc vectorDesc = conf.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); switch (vectorDesc.hashTableImplementationType()) { case OPTIMIZED: @@ -658,7 +484,7 @@ protected VectorizedRowBatch setupOverflowBatch() throws HiveException { // First, just allocate just the projection columns we will be using. for (int i = 0; i < outputProjection.length; i++) { int outputColumn = outputProjection[i]; - String typeName = outputTypeNames[i]; + String typeName = outputTypeInfos[i].getTypeName(); allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName); } @@ -690,7 +516,7 @@ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, overflowBatch.cols[outputColumn] = VectorizedBatchUtil.createColumnVector(typeInfo); if (isLogDebugEnabled) { - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName()); } } } @@ -727,9 +553,9 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { } protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) { - LOG.debug("commonSetup " + batchName + " column count " + batch.numCols); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column count " + batch.numCols); for (int column = 0; column < batch.numCols; column++) { - LOG.debug("commonSetup " + batchName + " column " + column + " type " + (batch.cols[column] == null ? "NULL" : batch.cols[column].getClass().getSimpleName())); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column " + column + " type " + (batch.cols[column] == null ? "NULL" : batch.cols[column].getClass().getSimpleName())); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index 0bba141..43f3951 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBigOnlyGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerBigOnlyLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index 621804b..95fb0c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -40,6 +40,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on Multi-Key * and only big table columns appear in the join result so a hash multi-set is used. @@ -48,8 +50,17 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInnerBigOnlyGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyMultiKeyOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerBigOnlyMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -114,7 +125,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index 10e75ab..044e3e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -46,8 +46,17 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerBigOnlyGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyStringOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerBigOnlyStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index 804d69c..c85e1d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -44,8 +44,17 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index fcfa0bd..a108cd0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -39,6 +39,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on a Multi-Key * using a hash map. @@ -46,8 +48,17 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerMultiKeyOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerMultiKeyOperator.class.getName()); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -112,7 +123,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index 0f9baae..3211d7d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerStringOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index 1149a9d..b02e6fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinLeftSemiLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index e0baebc..36b8f3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -40,6 +40,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an left semi join on Multi-Key * using hash set. @@ -47,8 +49,17 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemiGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinLeftSemiMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -113,7 +124,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 49e1177..0b3de0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -46,8 +46,17 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinLeftSemiStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index 58bd0ab..72309e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -45,8 +45,17 @@ */ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOuterLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinOuterLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index 7f9afd2..a4fc7d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -40,6 +40,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an outer join on Multi-Key * using a hash map. @@ -47,8 +49,17 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOuterMultiKeyOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinOuterMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -112,7 +123,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index 8ed1ed4..6e7e5cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOuterStringOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 9f3b107..069cc9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -94,7 +94,7 @@ public VectorMapJoinHashTable vectorMapJoinHashTable() { private VectorMapJoinFastHashTable createHashTable(int newThreshold) { boolean isOuterJoin = !desc.isNoOuterJoin(); - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); HashTableKind hashTableKind = vectorDesc.hashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java index f34b1cd..111a6d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java @@ -40,7 +40,7 @@ public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, ReusableGetAdaptor hashMapRowGetter = mapJoinTableContainer.createGetter(refKey); boolean isOuterJoin = !desc.isNoOuterJoin(); - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableKind hashTableKind = vectorDesc.hashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType(); boolean minMaxEnabled = vectorDesc.minMaxEnabled(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java index 8133aef..42ca4b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java @@ -160,7 +160,7 @@ public VectorReduceSinkCommonOperator(CompilationOpContext ctx, ReduceSinkDesc desc = (ReduceSinkDesc) conf; this.conf = desc; - vectorDesc = desc.getVectorDesc(); + vectorDesc = (VectorReduceSinkDesc) desc.getVectorDesc(); vectorReduceSinkInfo = vectorDesc.getVectorReduceSinkInfo(); this.vContext = vContext; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index 51e2d78..0bb0f22 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -382,6 +382,11 @@ public void setExpr(ExprNodeGenericFuncDesc expr) { } @Override + public String vectorExpressionParameters() { + return expr.getExprString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()).build(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 439950b..fadbc20 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; import java.io.Serializable; +import java.lang.annotation.Annotation; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -33,6 +34,7 @@ import java.util.Set; import java.util.Stack; import java.util.regex.Pattern; +import org.apache.commons.lang.ArrayUtils; import org.apache.calcite.util.Pair; import org.apache.commons.lang.ArrayUtils; @@ -43,6 +45,8 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -62,7 +66,11 @@ import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; @@ -74,6 +82,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -92,23 +101,43 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.LimitDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; +import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc; +import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; +import org.apache.hadoop.hive.ql.plan.VectorizationCondition; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorLimitDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion; +import org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; import org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc; +import org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc; import org.apache.hadoop.hive.ql.plan.SparkWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -118,10 +147,13 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.udf.UDFAcos; import org.apache.hadoop.hive.ql.udf.UDFAsin; @@ -171,6 +203,8 @@ import org.apache.hadoop.hive.serde2.NullStructSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -183,6 +217,9 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hive.common.util.AnnotationUtils; +import org.apache.hive.common.util.HiveStringUtils; +import org.apache.hive.common.util.ReflectionUtil; import com.google.common.base.Preconditions; @@ -225,23 +262,50 @@ supportedDataTypesPattern = Pattern.compile(patternBuilder.toString()); } - List> vectorizableTasks = + private List> vectorizableTasks = new ArrayList>(); - Set> supportedGenericUDFs = new HashSet>(); + private Set> supportedGenericUDFs = new HashSet>(); - Set supportedAggregationUdfs = new HashSet(); + private Set supportedAggregationUdfs = new HashSet(); private HiveConf hiveConf; private boolean isSpark; - boolean useVectorizedInputFileFormat; - boolean useVectorDeserialize; - boolean useRowDeserialize; + private boolean useVectorizedInputFileFormat; + private boolean useVectorDeserialize; + private boolean useRowDeserialize; + private boolean isReduceVectorizationEnabled; + + private boolean isSchemaEvolution; - boolean isSchemaEvolution; + private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; - HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; + private BaseWork currentBaseWork; + private Operator currentOperator; + + public void testSetCurrentBaseWork(BaseWork testBaseWork) { + currentBaseWork = testBaseWork; + } + + private void setNodeIssue(String issue) { + currentBaseWork.setNotVectorizedReason( + VectorizerReason.createNodeIssue(issue)); + } + + private void setOperatorIssue(String issue) { + currentBaseWork.setNotVectorizedReason( + VectorizerReason.createOperatorIssue(currentOperator, issue)); + } + + private void setExpressionIssue(String expressionTitle, String issue) { + currentBaseWork.setNotVectorizedReason( + VectorizerReason.createExpressionIssue(currentOperator, expressionTitle, issue)); + } + + private void clearNotVectorizedReason() { + currentBaseWork.setNotVectorizedReason(null); + } public Vectorizer() { @@ -380,6 +444,10 @@ public Vectorizer() { int partitionColumnCount; boolean useVectorizedInputFileFormat; + boolean groupByVectorOutput; + boolean allNative; + boolean usesVectorUDFAdaptor; + String[] scratchTypeNameArray; Set> nonVectorizedOps; @@ -390,6 +458,12 @@ public Vectorizer() { partitionColumnCount = 0; } + public void assume() { + groupByVectorOutput = true; + allNative = true; + usesVectorUDFAdaptor = false; + } + public void setAllColumnNames(List allColumnNames) { this.allColumnNames = allColumnNames; } @@ -405,9 +479,19 @@ public void setPartitionColumnCount(int partitionColumnCount) { public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } + public void setGroupByVectorOutput(boolean groupByVectorOutput) { + this.groupByVectorOutput = groupByVectorOutput; + } + public void setAllNative(boolean allNative) { + this.allNative = allNative; + } + public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) { + this.usesVectorUDFAdaptor = usesVectorUDFAdaptor; + } public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; } + public void setNonVectorizedOps(Set> nonVectorizedOps) { this.nonVectorizedOps = nonVectorizedOps; } @@ -439,7 +523,14 @@ public void transferToBaseWork(BaseWork baseWork) { scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); - baseWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); + if (baseWork instanceof MapWork) { + MapWork mapWork = (MapWork) baseWork; + mapWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); + } + + baseWork.setAllNative(allNative); + baseWork.setGroupByVectorOutput(groupByVectorOutput); + baseWork.setUsesVectorUDFAdaptor(usesVectorUDFAdaptor); } } @@ -456,17 +547,29 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task currTask = (Task) nd; if (currTask instanceof MapRedTask) { - convertMapWork(((MapRedTask) currTask).getWork().getMapWork(), false); + MapredWork mapredWork = ((MapRedTask) currTask).getWork(); + convertMapWork(mapredWork.getMapWork(), false); + ReduceWork reduceWork = mapredWork.getReduceWork(); + if (reduceWork != null) { + // Always set the EXPLAIN conditions. + setReduceWorkExplainConditions(reduceWork); + + // We do not vectorize MR Reduce. + } } else if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); - for (BaseWork w: work.getAllWork()) { - if (w instanceof MapWork) { - convertMapWork((MapWork) w, true); - } else if (w instanceof ReduceWork) { - // We are only vectorizing Reduce under Tez. - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { - convertReduceWork((ReduceWork) w, true); + for (BaseWork baseWork: work.getAllWork()) { + if (baseWork instanceof MapWork) { + convertMapWork((MapWork) baseWork, true); + } else if (baseWork instanceof ReduceWork) { + ReduceWork reduceWork = (ReduceWork) baseWork; + + // Always set the EXPLAIN conditions. + setReduceWorkExplainConditions(reduceWork); + + // We are only vectorizing Reduce under Tez/Spark. + if (isReduceVectorizationEnabled) { + convertReduceWork(reduceWork); } } } @@ -474,22 +577,51 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) SparkWork sparkWork = (SparkWork) currTask.getWork(); for (BaseWork baseWork : sparkWork.getAllWork()) { if (baseWork instanceof MapWork) { - convertMapWork((MapWork) baseWork, false); - } else if (baseWork instanceof ReduceWork - && HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { - convertReduceWork((ReduceWork) baseWork, false); + convertMapWork((MapWork) baseWork, true); + } else if (baseWork instanceof ReduceWork) { + ReduceWork reduceWork = (ReduceWork) baseWork; + + // Always set the EXPLAIN conditions. + setReduceWorkExplainConditions(reduceWork); + + if (isReduceVectorizationEnabled) { + convertReduceWork(reduceWork); + } } } } + return null; } - private void convertMapWork(MapWork mapWork, boolean isTez) throws SemanticException { + private void convertMapWork(MapWork mapWork, boolean isTezOrSpark) throws SemanticException { + + mapWork.setVectorizationExamined(true); + + // Global used when setting errors, etc. + currentBaseWork = mapWork; + VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); - boolean ret = validateMapWork(mapWork, vectorTaskColumnInfo, isTez); + vectorTaskColumnInfo.assume(); + + boolean ret; + try { + ret = validateMapWork(mapWork, vectorTaskColumnInfo, isTezOrSpark); + } catch (Exception e) { + String issue = "exception: " + VectorizationContext.getStackTraceAsSingleLine(e); + setNodeIssue(issue); + ret = false; + } if (ret) { - vectorizeMapWork(mapWork, vectorTaskColumnInfo, isTez); + vectorizeMapWork(mapWork, vectorTaskColumnInfo, isTezOrSpark); + } else if (currentBaseWork.getVectorizationEnabled()) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + LOG.info("Cannot vectorize: unknown"); + } else { + LOG.info("Cannot vectorize: " + notVectorizedReason.toString()); + } + clearMapWorkVectorDescs(mapWork); } } @@ -510,6 +642,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) LinkedHashMap> aliasToWork = mapWork.getAliasToWork(); if ((aliasToWork == null) || (aliasToWork.size() == 0)) { + setNodeIssue("Vectorized map work requires work"); return null; } int tableScanCount = 0; @@ -518,7 +651,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) for (Entry> entry : aliasToWork.entrySet()) { Operator op = entry.getValue(); if (op == null) { - LOG.warn("Map work has invalid aliases to work with. Fail validation!"); + setNodeIssue("Vectorized map work requires a valid alias"); return null; } if (op instanceof TableScanOperator) { @@ -528,7 +661,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) } } if (tableScanCount > 1) { - LOG.warn("Map work has more than 1 TableScanOperator. Fail validation!"); + setNodeIssue("Vectorized map work only works with 1 TableScanOperator"); return null; } return new ImmutablePair(alias, tableScanOperator); @@ -569,22 +702,6 @@ private void determineDataColumnNums(TableScanOperator tableScanOperator, } } - private String getHiveOptionsString() { - StringBuilder sb = new StringBuilder(); - sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); - sb.append("="); - sb.append(useVectorizedInputFileFormat); - sb.append(", "); - sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); - sb.append("="); - sb.append(useVectorDeserialize); - sb.append(", and "); - sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); - sb.append("="); - sb.append(useRowDeserialize); - return sb.toString(); - } - /* * There are 3 modes of reading for vectorization: * @@ -599,44 +716,58 @@ private String getHiveOptionsString() { * the row object into the VectorizedRowBatch with VectorAssignRow. * This picks up Input File Format not supported by the other two. */ - private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable) { + private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable, + HashSet inputFileFormatClassNameSet, HashSet enabledConditionsMetSet, + ArrayList enabledConditionsNotMetList) { String inputFileFormatClassName = pd.getInputFileFormatClassName(); + // Always collect input file formats. + inputFileFormatClassNameSet.add(inputFileFormatClassName); + + boolean isInputFileFormatVectorized = Utilities.isInputFileFormatVectorized(pd); + + if (isAcidTable) { + + // Today, ACID tables are only ORC and that format is vectorizable. Verify these + // assumptions. + Preconditions.checkState(isInputFileFormatVectorized); + Preconditions.checkState(inputFileFormatClassName.equals(OrcInputFormat.class.getName())); + + if (!useVectorizedInputFileFormat) { + enabledConditionsNotMetList.add( + "Vectorizing ACID tables requires " + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + return false; + } + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorizedInputFileFormat( + inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd))); + + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + return true; + } + // Look for Pass-Thru case where InputFileFormat has VectorizedInputFormatInterface // and reads VectorizedRowBatch as a "row". - if (isAcidTable || useVectorizedInputFileFormat) { + if (useVectorizedInputFileFormat) { - if (Utilities.isInputFileFormatVectorized(pd)) { - - if (!useVectorizedInputFileFormat) { - LOG.info("ACID tables con only be vectorized for the input file format -- " + - "i.e. when Hive Configuration option " + - HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname + - "=true"); - return false; - } + if (isInputFileFormatVectorized) { pd.setVectorPartitionDesc( VectorPartitionDesc.createVectorizedInputFileFormat( inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd))); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); return true; } - - // Today, ACID tables are only ORC and that format is vectorizable. Verify this - // assumption. - Preconditions.checkState(!isAcidTable); + // Fall through and look for other options... } - if (!(isSchemaEvolution || isAcidTable) && - (useVectorDeserialize || useRowDeserialize)) { - LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + - " when both " + HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION.varname + "=false and " + - " ACID table is " + isAcidTable + " and " + - " given the Hive Configuration options " + getHiveOptionsString()); - return false; + if (!isSchemaEvolution) { + enabledConditionsNotMetList.add( + "Vectorizing tables without Schema Evolution requires " + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); } String deserializerClassName = pd.getDeserializerClassName(); @@ -646,6 +777,12 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable // // Do the "vectorized" row-by-row deserialization into a VectorizedRowBatch in the // VectorMapOperator. + boolean isTextFormat = inputFileFormatClassName.equals(TextInputFormat.class.getName()) && + deserializerClassName.equals(LazySimpleSerDe.class.getName()); + boolean isSequenceFormat = + inputFileFormatClassName.equals(SequenceFileInputFormat.class.getName()) && + deserializerClassName.equals(LazyBinarySerDe.class.getName()); + boolean isVectorDeserializeEligable = isTextFormat || isSequenceFormat; if (useVectorDeserialize) { @@ -659,8 +796,7 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable // org.apache.hadoop.mapred.SequenceFileInputFormat // org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - if (inputFileFormatClassName.equals(TextInputFormat.class.getName()) && - deserializerClassName.equals(LazySimpleSerDe.class.getName())) { + if (isTextFormat) { Properties properties = pd.getTableDesc().getProperties(); String lastColumnTakesRestString = @@ -670,10 +806,11 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable lastColumnTakesRestString.equalsIgnoreCase("true")); if (lastColumnTakesRest) { - // If row mode will not catch this, then inform. + // If row mode will not catch this input file format, then not enabled. if (useRowDeserialize) { - LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + - " when " + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST + "is true"); + enabledConditionsNotMetList.add( + inputFileFormatClassName + " " + + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST + " must be disabled "); return false; } } else { @@ -681,17 +818,19 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable VectorPartitionDesc.createVectorDeserialize( inputFileFormatClassName, VectorDeserializeType.LAZY_SIMPLE)); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); return true; } - } else if (inputFileFormatClassName.equals(SequenceFileInputFormat.class.getName()) && - deserializerClassName.equals(LazyBinarySerDe.class.getName())) { + } else if (isSequenceFormat) { pd.setVectorPartitionDesc( VectorPartitionDesc.createVectorDeserialize( inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY)); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); return true; } + // Fall through and look for other options... } // Otherwise, if enabled, deserialize rows using regular Serde and add the object @@ -705,17 +844,29 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable Utilities.isInputFileFormatSelfDescribing(pd), deserializerClassName)); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); return true; } - LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + - " given the Hive Configuration options " + getHiveOptionsString()); - + if (isInputFileFormatVectorized) { + Preconditions.checkState(!useVectorizedInputFileFormat); + enabledConditionsNotMetList.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + } else { + // Only offer these when the input file format is not the fast vectorized formats. + if (isVectorDeserializeEligable) { + Preconditions.checkState(!useVectorDeserialize); + enabledConditionsNotMetList.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); + } else { + // Since row mode takes everyone. + enabledConditionsNotMetList.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); + } + } + return false; } - private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, + private ImmutablePair validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { @@ -743,27 +894,40 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al LinkedHashMap> pathToAliases = mapWork.getPathToAliases(); LinkedHashMap pathToPartitionInfo = mapWork.getPathToPartitionInfo(); + + // Remember the input file formats we validated and why. + HashSet inputFileFormatClassNameSet = new HashSet(); + HashSet enabledConditionsMetSet = new HashSet(); + ArrayList enabledConditionsNotMetList = new ArrayList(); + for (Entry> entry: pathToAliases.entrySet()) { Path path = entry.getKey(); List aliases = entry.getValue(); boolean isPresent = (aliases != null && aliases.indexOf(alias) != -1); if (!isPresent) { - LOG.info("Alias " + alias + " not present in aliases " + aliases); - return false; + setOperatorIssue("Alias " + alias + " not present in aliases " + aliases); + return new ImmutablePair(false, false); } PartitionDesc partDesc = pathToPartitionInfo.get(path); if (partDesc.getVectorPartitionDesc() != null) { - // We seen this already. + // We've seen this already. continue; } - if (!verifyAndSetVectorPartDesc(partDesc, isAcidTable)) { - return false; + if (!verifyAndSetVectorPartDesc(partDesc, isAcidTable, inputFileFormatClassNameSet, + enabledConditionsMetSet, enabledConditionsNotMetList)) { + + // Always set these so EXPLAIN can see. + mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet); + mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet)); + mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); + + // We consider this an enable issue, not a not vectorized issue. + LOG.info("Cannot enable vectorization because input file format(s) " + inputFileFormatClassNameSet + + " do not met conditions " + VectorizationCondition.addBooleans(enabledConditionsNotMetList, false)); + return new ImmutablePair(false, true); } + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); - if (LOG.isDebugEnabled()) { - LOG.debug("Vectorizer path: " + path + ", " + vectorPartDesc.toString() + - ", aliases " + aliases); - } if (isFirst) { @@ -807,13 +971,13 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al * implicitly defaulted to null. */ if (nextDataColumnList.size() > tableDataColumnList.size()) { - LOG.info( + setOperatorIssue( String.format( "Could not vectorize partition %s " + "(deserializer " + deserializer.getClass().getName() + ")" + "The partition column names %d is greater than the number of table columns %d", path, nextDataColumnList.size(), tableDataColumnList.size())); - return false; + return new ImmutablePair(false, false); } if (!(deserializer instanceof NullStructSerDe)) { @@ -822,13 +986,13 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al String nextColumnName = nextDataColumnList.get(i); String tableColumnName = tableDataColumnList.get(i); if (!nextColumnName.equals(tableColumnName)) { - LOG.info( + setOperatorIssue( String.format( "Could not vectorize partition %s " + "(deserializer " + deserializer.getClass().getName() + ")" + "The partition column name %s is does not match table column name %s", path, nextColumnName, tableColumnName)); - return false; + return new ImmutablePair(false, false); } } } @@ -863,29 +1027,50 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al // Helps to keep this for debugging. vectorTaskColumnInfo.setTableScanOperator(tableScanOperator); - return true; + // Always set these so EXPLAIN can see. + mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet); + mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet)); + mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); + + return new ImmutablePair(true, false); } - private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) + private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTezOrSpark) throws SemanticException { LOG.info("Validating MapWork..."); - ImmutablePair pair = verifyOnlyOneTableScanOperator(mapWork); - if (pair == null) { + ImmutablePair onlyOneTableScanPair = verifyOnlyOneTableScanOperator(mapWork); + if (onlyOneTableScanPair == null) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + Preconditions.checkState(notVectorizedReason != null); + mapWork.setVectorizationEnabledConditionsNotMet(Arrays.asList(new String[] {notVectorizedReason.toString()})); return false; } - String alias = pair.left; - TableScanOperator tableScanOperator = pair.right; + String alias = onlyOneTableScanPair.left; + TableScanOperator tableScanOperator = onlyOneTableScanPair.right; // This call fills in the column names, types, and partition column count in // vectorTaskColumnInfo. - if (!validateInputFormatAndSchemaEvolution(mapWork, alias, tableScanOperator, vectorTaskColumnInfo)) { + currentOperator = tableScanOperator; + ImmutablePair validateInputFormatAndSchemaEvolutionPair = + validateInputFormatAndSchemaEvolution(mapWork, alias, tableScanOperator, vectorTaskColumnInfo); + if (!validateInputFormatAndSchemaEvolutionPair.left) { + // Have we already set the enabled conditions not met? + if (!validateInputFormatAndSchemaEvolutionPair.right) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + Preconditions.checkState(notVectorizedReason != null); + mapWork.setVectorizationEnabledConditionsNotMet(Arrays.asList(new String[] {notVectorizedReason.toString()})); + } return false; } + // Now we are enabled and any issues found from here on out are considered + // not vectorized issues. + mapWork.setVectorizationEnabled(true); + Map opRules = new LinkedHashMap(); - MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTez); + MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTezOrSpark); addMapWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -907,13 +1092,13 @@ private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTask } private void vectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, - boolean isTez) throws SemanticException { + boolean isTezOrSpark) throws SemanticException { LOG.info("Vectorizing MapWork..."); mapWork.setVectorMode(true); Map opRules = new LinkedHashMap(); MapWorkVectorizationNodeProcessor vnp = - new MapWorkVectorizationNodeProcessor(mapWork, isTez, vectorTaskColumnInfo); + new MapWorkVectorizationNodeProcessor(mapWork, isTezOrSpark, vectorTaskColumnInfo); addMapWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderOnceWalker(disp); @@ -934,11 +1119,42 @@ private void vectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskCo return; } - private void convertReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException { + private void setReduceWorkExplainConditions(ReduceWork reduceWork) { + + reduceWork.setVectorizationExamined(true); + + reduceWork.setReduceVectorizationEnabled(isReduceVectorizationEnabled); + reduceWork.setVectorReduceEngine( + HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE)); + } + + private void convertReduceWork(ReduceWork reduceWork) throws SemanticException { + + // Global used when setting errors, etc. + currentBaseWork = reduceWork; + currentBaseWork.setVectorizationEnabled(true); + VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); - boolean ret = validateReduceWork(reduceWork, vectorTaskColumnInfo, isTez); + vectorTaskColumnInfo.assume(); + + boolean ret; + try { + ret = validateReduceWork(reduceWork, vectorTaskColumnInfo); + } catch (Exception e) { + String issue = "exception: " + VectorizationContext.getStackTraceAsSingleLine(e); + setNodeIssue(issue); + ret = false; + } if (ret) { - vectorizeReduceWork(reduceWork, vectorTaskColumnInfo, isTez); + vectorizeReduceWork(reduceWork, vectorTaskColumnInfo); + } else if (currentBaseWork.getVectorizationEnabled()) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + LOG.info("Cannot vectorize: unknown"); + } else { + LOG.info("Cannot vectorize: " + notVectorizedReason.toString()); + } + clearReduceWorkVectorDescs(reduceWork); } } @@ -952,13 +1168,14 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, // Check key ObjectInspector. ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector(); if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) { + setNodeIssue("Key object inspector missing or not StructObjectInspector"); return false; } StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector; List keyFields = keyStructObjectInspector.getAllStructFieldRefs(); - // Tez doesn't use tagging... if (reduceWork.getNeedsTagging()) { + setNodeIssue("Tez doesn't use tagging"); return false; } @@ -966,6 +1183,7 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, ObjectInspector valueObjectInspector = reduceWork.getValueObjectInspector(); if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) { + setNodeIssue("Value object inspector missing or not StructObjectInspector"); return false; } StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector; @@ -995,7 +1213,7 @@ private void addReduceWorkRules(Map opRules, NodeProcessor } private boolean validateReduceWork(ReduceWork reduceWork, - VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) throws SemanticException { + VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { LOG.info("Validating ReduceWork..."); @@ -1026,7 +1244,7 @@ private boolean validateReduceWork(ReduceWork reduceWork, } private void vectorizeReduceWork(ReduceWork reduceWork, - VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) throws SemanticException { + VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { LOG.info("Vectorizing ReduceWork..."); reduceWork.setVectorMode(true); @@ -1036,7 +1254,7 @@ private void vectorizeReduceWork(ReduceWork reduceWork, // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker. Map opRules = new LinkedHashMap(); ReduceWorkVectorizationNodeProcessor vnp = - new ReduceWorkVectorizationNodeProcessor(vectorTaskColumnInfo, isTez); + new ReduceWorkVectorizationNodeProcessor(vectorTaskColumnInfo); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderWalker(disp); @@ -1059,12 +1277,55 @@ private void vectorizeReduceWork(ReduceWork reduceWork, debugDisplayAllMaps(reduceWork); } } + + class ClearVectorDescsNodeProcessor implements NodeProcessor { + + public ClearVectorDescsNodeProcessor() { + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + for (Node n : stack) { + Operator op = (Operator) n; + + OperatorDesc desc = op.getConf(); + if (desc instanceof AbstractOperatorDesc) { + AbstractOperatorDesc abstractDesc = (AbstractOperatorDesc) desc; + abstractDesc.setVectorDesc(null); + } + } + return null; + } + } + + private void clearMapWorkVectorDescs(MapWork mapWork) throws SemanticException { + Map opRules = new LinkedHashMap(); + ClearVectorDescsNodeProcessor vnp = new ClearVectorDescsNodeProcessor(); + addMapWorkRules(opRules, vnp); + Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(mapWork.getAliasToWork().values()); + ogw.startWalking(topNodes, null); + } + + private void clearReduceWorkVectorDescs(ReduceWork reduceWork) throws SemanticException { + Map opRules = new LinkedHashMap(); + ClearVectorDescsNodeProcessor vnp = new ClearVectorDescsNodeProcessor(); + addReduceWorkRules(opRules, vnp); + Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(reduceWork.getReducer()); + ogw.startWalking(topNodes, null); + } } class MapWorkValidationNodeProcessor implements NodeProcessor { private final MapWork mapWork; - private final boolean isTez; + private final boolean isTezOrSpark; // Children of Vectorized GROUPBY that outputs rows instead of vectorized row batchs. protected final Set> nonVectorizedOps = @@ -1074,9 +1335,9 @@ private void vectorizeReduceWork(ReduceWork reduceWork, return nonVectorizedOps; } - public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) { + public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTezOrSpark) { this.mapWork = mapWork; - this.isTez = isTez; + this.isTezOrSpark = isTezOrSpark; } @Override @@ -1088,13 +1349,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return new Boolean(true); } boolean ret; + currentOperator = op; try { - ret = validateMapWorkOperator(op, mapWork, isTez); + ret = validateMapWorkOperator(op, mapWork, isTezOrSpark); } catch (Exception e) { throw new SemanticException(e); } if (!ret) { - LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized."); return new Boolean(false); } // When Vectorized GROUPBY outputs rows instead of vectorized row batches, we don't @@ -1130,9 +1391,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (nonVectorizedOps.contains(op)) { return new Boolean(true); } + currentOperator = op; boolean ret = validateReduceWorkOperator(op); if (!ret) { - LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized."); return new Boolean(false); } // When Vectorized GROUPBY outputs rows instead of vectorized row batches, we don't @@ -1153,9 +1414,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // The vectorization context for the Map or Reduce task. protected VectorizationContext taskVectorizationContext; + protected final VectorTaskColumnInfo vectorTaskColumnInfo; protected final Set> nonVectorizedOps; - VectorizationNodeProcessor(Set> nonVectorizedOps) { + VectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo, + Set> nonVectorizedOps) { + this.vectorTaskColumnInfo = vectorTaskColumnInfo; this.nonVectorizedOps = nonVectorizedOps; } @@ -1203,11 +1467,11 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac } public Operator doVectorize(Operator op, - VectorizationContext vContext, boolean isTez) throws SemanticException { + VectorizationContext vContext, boolean isTezOrSpark) throws SemanticException { Operator vectorOp = op; try { if (!opsDone.contains(op)) { - vectorOp = vectorizeOperator(op, vContext, isTez); + vectorOp = vectorizeOperator(op, vContext, isTezOrSpark, vectorTaskColumnInfo); opsDone.add(op); if (vectorOp != op) { opToVectorOpMap.put(op, vectorOp); @@ -1231,14 +1495,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private final MapWork mWork; private final VectorTaskColumnInfo vectorTaskColumnInfo; - private final boolean isTez; + private final boolean isTezOrSpark; - public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez, + public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) { - super(vectorTaskColumnInfo.getNonVectorizedOps()); + super(vectorTaskColumnInfo, vectorTaskColumnInfo.getNonVectorizedOps()); this.mWork = mWork; this.vectorTaskColumnInfo = vectorTaskColumnInfo; - this.isTez = isTez; + this.isTezOrSpark = isTezOrSpark; } @Override @@ -1252,6 +1516,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, VectorizationContext vContext = null; + currentOperator = op; if (op instanceof TableScanOperator) { if (taskVectorizationContext == null) { taskVectorizationContext = getVectorizationContext(op.getName(), vectorTaskColumnInfo); @@ -1272,7 +1537,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + " using vectorization context" + vContext.toString()); } - Operator vectorOp = doVectorize(op, vContext, isTez); + Operator vectorOp = doVectorize(op, vContext, isTezOrSpark); if (LOG.isDebugEnabled()) { if (vectorOp instanceof VectorizationContextRegion) { @@ -1290,7 +1555,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private final VectorTaskColumnInfo vectorTaskColumnInfo; - private final boolean isTez; private Operator rootVectorOp; @@ -1298,13 +1562,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return rootVectorOp; } - public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo, - boolean isTez) { + public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo) { - super(vectorTaskColumnInfo.getNonVectorizedOps()); + super(vectorTaskColumnInfo, vectorTaskColumnInfo.getNonVectorizedOps()); this.vectorTaskColumnInfo = vectorTaskColumnInfo; rootVectorOp = null; - this.isTez = isTez; } @Override @@ -1320,6 +1582,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean saveRootVectorOp = false; + currentOperator = op; if (op.getParentOperators().size() == 0) { LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + vectorTaskColumnInfo.allColumnNames.toString()); @@ -1344,7 +1607,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, assert vContext != null; LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); - Operator vectorOp = doVectorize(op, vContext, isTez); + Operator vectorOp = doVectorize(op, vContext, true); if (LOG.isDebugEnabled()) { if (vectorOp instanceof VectorizationContextRegion) { @@ -1404,6 +1667,10 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE // is enabled and we are going to run in LLAP. However, we don't know if we end up in // LLAP or not at this stage, so don't do this now. We may need to add a 'force' option. + isReduceVectorizationEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED); + isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -1423,18 +1690,32 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE return physicalContext; } - boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTez) { - boolean ret = false; + private void setOperatorNotSupported(Operator op) { + OperatorDesc desc = op.getConf(); + Annotation note = AnnotationUtils.getAnnotation(desc.getClass(), Explain.class); + if (note != null) { + Explain explainNote = (Explain) note; + setNodeIssue(explainNote.displayName() + " (" + op.getType() + ") not supported"); + } else { + setNodeIssue("Operator " + op.getType() + " not supported"); + } + } + + boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTezOrSpark) { + boolean ret; switch (op.getType()) { case MAPJOIN: if (op instanceof MapJoinOperator) { ret = validateMapJoinOperator((MapJoinOperator) op); } else if (op instanceof SMBMapJoinOperator) { ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op); + } else { + setOperatorNotSupported(op); + ret = false; } break; case GROUPBY: - ret = validateGroupByOperator((GroupByOperator) op, false, isTez); + ret = validateGroupByOperator((GroupByOperator) op, false, isTezOrSpark); break; case FILTER: ret = validateFilterOperator((FilterOperator) op); @@ -1459,6 +1740,7 @@ boolean validateMapWorkOperator(Operator op, MapWork mWo validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); break; default: + setOperatorNotSupported(op); ret = false; break; } @@ -1466,7 +1748,7 @@ boolean validateMapWorkOperator(Operator op, MapWork mWo } boolean validateReduceWorkOperator(Operator op) { - boolean ret = false; + boolean ret; switch (op.getType()) { case MAPJOIN: // Does MAPJOIN actually get planned in Reduce? @@ -1474,6 +1756,9 @@ boolean validateReduceWorkOperator(Operator op) { ret = validateMapJoinOperator((MapJoinOperator) op); } else if (op instanceof SMBMapJoinOperator) { ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op); + } else { + setOperatorNotSupported(op); + ret = false; } break; case GROUPBY: @@ -1481,6 +1766,7 @@ boolean validateReduceWorkOperator(Operator op) { HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) { ret = validateGroupByOperator((GroupByOperator) op, true, true); } else { + setNodeIssue("Operator " + op.getType() + " not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED.name() + "=true IS false)"); ret = false; } break; @@ -1506,6 +1792,7 @@ boolean validateReduceWorkOperator(Operator op) { validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); break; default: + setOperatorNotSupported(op); ret = false; break; } @@ -1528,7 +1815,7 @@ private Boolean isVectorizedGroupByThatOutputsRows(Operator filterExprs = desc.getFilters().get(posBigTable); - if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) { - LOG.info("Cannot vectorize map work filter expression"); + if (!validateExprNodeDesc(filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER)) { return false; } List keyExprs = desc.getKeys().get(posBigTable); - if (!validateExprNodeDesc(keyExprs)) { - LOG.info("Cannot vectorize map work key expression"); + if (!validateExprNodeDesc(keyExprs, "Key")) { return false; } List valueExprs = desc.getExprs().get(posBigTable); - if (!validateExprNodeDesc(valueExprs)) { - LOG.info("Cannot vectorize map work value expression"); + if (!validateExprNodeDesc(valueExprs, "Value")) { return false; } Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); - if (!validateExprNodeDesc(smallTableExprs)) { - LOG.info("Cannot vectorize map work small table expression"); + if (!validateExprNodeDesc(smallTableExprs, "Small Table")) { return false; } if (desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) { @@ -1591,24 +1875,23 @@ private boolean validateSparkHashTableSinkOperator(SparkHashTableSinkOperator op List filterExprs = desc.getFilters().get(tag); List keyExprs = desc.getKeys().get(tag); List valueExprs = desc.getExprs().get(tag); - return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) && - validateExprNodeDesc(keyExprs) && validateExprNodeDesc(valueExprs); + return validateExprNodeDesc(filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER) && + validateExprNodeDesc(keyExprs, "Key") && validateExprNodeDesc(valueExprs, "Value"); } private boolean validateReduceSinkOperator(ReduceSinkOperator op) { List keyDescs = op.getConf().getKeyCols(); List partitionDescs = op.getConf().getPartitionCols(); List valueDesc = op.getConf().getValueCols(); - return validateExprNodeDesc(keyDescs) && validateExprNodeDesc(partitionDescs) && - validateExprNodeDesc(valueDesc); + return validateExprNodeDesc(keyDescs, "Key") && validateExprNodeDesc(partitionDescs, "Partition") && + validateExprNodeDesc(valueDesc, "Value"); } private boolean validateSelectOperator(SelectOperator op) { List descList = op.getConf().getColList(); for (ExprNodeDesc desc : descList) { - boolean ret = validateExprNodeDesc(desc); + boolean ret = validateExprNodeDesc(desc, "Select"); if (!ret) { - LOG.info("Cannot vectorize select expression: " + desc.toString()); return false; } } @@ -1617,28 +1900,26 @@ private boolean validateSelectOperator(SelectOperator op) { private boolean validateFilterOperator(FilterOperator op) { ExprNodeDesc desc = op.getConf().getPredicate(); - return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.FILTER); + return validateExprNodeDesc(desc, "Predicate", VectorExpressionDescriptor.Mode.FILTER); } - private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTez) { + private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTezOrSpark) { GroupByDesc desc = op.getConf(); - VectorGroupByDesc vectorDesc = desc.getVectorDesc(); if (desc.isGroupingSetsPresent()) { - LOG.info("Grouping sets not supported in vector mode"); + setOperatorIssue("Grouping sets not supported"); return false; } if (desc.pruneGroupingSetId()) { - LOG.info("Pruning grouping set id not supported in vector mode"); + setOperatorIssue("Pruning grouping set id not supported"); return false; } if (desc.getMode() != GroupByDesc.Mode.HASH && desc.isDistinct()) { - LOG.info("DISTINCT not supported in vector mode"); + setOperatorIssue("DISTINCT not supported"); return false; } - boolean ret = validateExprNodeDesc(desc.getKeys()); + boolean ret = validateExprNodeDesc(desc.getKeys(), "Key"); if (!ret) { - LOG.info("Cannot vectorize groupby key expression " + desc.getKeys().toString()); return false; } @@ -1751,6 +2032,9 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo // If all the aggregation outputs are primitive, we can output VectorizedRowBatch. // Otherwise, we the rest of the operator tree will be row mode. + VectorGroupByDesc vectorDesc = new VectorGroupByDesc(); + desc.setVectorDesc(vectorDesc); + vectorDesc.setVectorOutput(retPair.right); vectorDesc.setProcessingMode(processingMode); @@ -1765,14 +2049,15 @@ private boolean validateFileSinkOperator(FileSinkOperator op) { return true; } - private boolean validateExprNodeDesc(List descs) { - return validateExprNodeDesc(descs, VectorExpressionDescriptor.Mode.PROJECTION); + private boolean validateExprNodeDesc(List descs, String expressionTitle) { + return validateExprNodeDesc(descs, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); } private boolean validateExprNodeDesc(List descs, + String expressionTitle, VectorExpressionDescriptor.Mode mode) { for (ExprNodeDesc d : descs) { - boolean ret = validateExprNodeDesc(d, mode); + boolean ret = validateExprNodeDesc(d, expressionTitle, mode); if (!ret) { return false; } @@ -1795,19 +2080,20 @@ private boolean validateExprNodeDesc(List descs, return new Pair(true, outputIsPrimitive); } - private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { + private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, + VectorExpressionDescriptor.Mode mode) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; // Currently, we do not support vectorized virtual columns (see HIVE-5570). if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) { - LOG.info("Cannot vectorize virtual column " + c.getColumn()); + setExpressionIssue(expressionTitle, "Virtual columns not supported (" + c.getColumn() + ")"); return false; } } String typeName = desc.getTypeInfo().getTypeName(); boolean ret = validateDataType(typeName, mode); if (!ret) { - LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName); + setExpressionIssue(expressionTitle, "Data type " + typeName + " of " + desc.toString() + " not supported"); return false; } boolean isInExpression = false; @@ -1815,7 +2101,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc; boolean r = validateGenericUdf(d); if (!r) { - LOG.info("Cannot vectorize UDF " + d); + setExpressionIssue(expressionTitle, "UDF " + d + " not supported"); return false; } GenericUDF genericUDF = d.getGenericUDF(); @@ -1826,14 +2112,14 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. - if (!validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateStructInExpression(desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { return false; } } else { for (ExprNodeDesc d : desc.getChildren()) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. - if (!validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateExprNodeDescRecursive(d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { return false; } } @@ -1843,7 +2129,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio } private boolean validateStructInExpression(ExprNodeDesc desc, - VectorExpressionDescriptor.Mode mode) { + String expressionTitle, VectorExpressionDescriptor.Mode mode) { for (ExprNodeDesc d : desc.getChildren()) { TypeInfo typeInfo = d.getTypeInfo(); if (typeInfo.getCategory() != Category.STRUCT) { @@ -1859,7 +2145,8 @@ private boolean validateStructInExpression(ExprNodeDesc desc, TypeInfo fieldTypeInfo = fieldTypeInfos.get(f); Category category = fieldTypeInfo.getCategory(); if (category != Category.PRIMITIVE) { - LOG.info("Cannot vectorize struct field " + fieldNames.get(f) + setExpressionIssue(expressionTitle, + "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName()); return false; } @@ -1872,7 +2159,8 @@ private boolean validateStructInExpression(ExprNodeDesc desc, if (inConstantType != InConstantType.INT_FAMILY && inConstantType != InConstantType.FLOAT_FAMILY && inConstantType != InConstantType.STRING_FAMILY) { - LOG.info("Cannot vectorize struct field " + fieldNames.get(f) + setExpressionIssue(expressionTitle, + "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName()); return false; } @@ -1881,31 +2169,28 @@ private boolean validateStructInExpression(ExprNodeDesc desc, return true; } - private boolean validateExprNodeDesc(ExprNodeDesc desc) { - return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.PROJECTION); + private boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle) { + return validateExprNodeDesc(desc, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); } - boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { - if (!validateExprNodeDescRecursive(desc, mode)) { + boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle, + VectorExpressionDescriptor.Mode mode) { + if (!validateExprNodeDescRecursive(desc, expressionTitle, mode)) { return false; } try { VectorizationContext vc = new ValidatorVectorizationContext(hiveConf); if (vc.getVectorExpression(desc, mode) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getVectorExpression returned null"); + setExpressionIssue(expressionTitle, "getVectorExpression returned null"); return false; } } catch (Exception e) { if (e instanceof HiveException) { - LOG.info(e.getMessage()); + setExpressionIssue(expressionTitle, e.getMessage()); } else { - if (LOG.isDebugEnabled()) { - // Show stack trace. - LOG.debug("Failed to vectorize", e); - } else { - LOG.info("Failed to vectorize", e.getMessage()); - } + String issue = "exception: " + VectorizationContext.getStackTraceAsSingleLine(e); + setExpressionIssue(expressionTitle, issue); } return false; } @@ -1929,9 +2214,9 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { return true; } - private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorAggrExpr) { + public static ObjectInspector.Category aggregationOutputCategory(VectorAggregateExpression vectorAggrExpr) { ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); - return (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE); + return outputObjInspector.getCategory(); } private Pair validateAggregationDesc(AggregationDesc aggDesc, ProcessingMode processingMode, @@ -1939,11 +2224,10 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA String udfName = aggDesc.getGenericUDAFName().toLowerCase(); if (!supportedAggregationUdfs.contains(udfName)) { - LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported"); + setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported"); return new Pair(false, false); } - if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { - LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported"); + if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters(), "Aggregation Function UDF " + udfName + " parameter")) { return new Pair(false, false); } @@ -1957,6 +2241,7 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA if (LOG.isDebugEnabled()) { LOG.debug("Vectorization of aggregation should have succeeded ", e); } + setExpressionIssue("Aggregation Function", "Vectorization of aggreation should have succeeded " + e); return new Pair(false, false); } if (LOG.isDebugEnabled()) { @@ -1964,11 +2249,12 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA " vector expression " + vectorAggrExpr.toString()); } - boolean outputIsPrimitive = validateAggregationIsPrimitive(vectorAggrExpr); + ObjectInspector.Category outputCategory = aggregationOutputCategory(vectorAggrExpr); + boolean outputIsPrimitive = (outputCategory == ObjectInspector.Category.PRIMITIVE); if (processingMode == ProcessingMode.MERGE_PARTIAL && hasKeys && !outputIsPrimitive) { - LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); + setOperatorIssue("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); return new Pair(false, false); } @@ -2036,12 +2322,12 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { if (smallTableIndices[i] < 0) { // Negative numbers indicate a column to be (deserialize) read from the small table's // LazyBinary value row. - LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices[i] < 0 returning false"); + setOperatorIssue("Vectorizer isBigTableOnlyResults smallTableIndices[i] < 0 returning false"); return false; } } } else if (smallTableRetainSize > 0) { - LOG.info("Vectorizer isBigTableOnlyResults smallTableRetainSize > 0 returning false"); + setOperatorIssue("Vectorizer isBigTableOnlyResults smallTableRetainSize > 0 returning false"); return false; } @@ -2050,20 +2336,21 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { } Operator specializeMapJoinOperator(Operator op, - VectorizationContext vContext, MapJoinDesc desc) throws HiveException { + VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinInfo vectorMapJoinInfo) + throws HiveException { Operator vectorOp = null; Class> opClass = null; - VectorMapJoinDesc.HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; - VectorMapJoinDesc.HashTableKind hashTableKind = HashTableKind.NONE; - VectorMapJoinDesc.HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); + + HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; + HashTableKind hashTableKind = HashTableKind.NONE; + HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; + OperatorVariation operatorVariation = OperatorVariation.NONE; - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { + if (vectorDesc.getIsFastHashTableEnabled()) { hashTableImplementationType = HashTableImplementationType.FAST; } else { - // Restrict to using BytesBytesMultiHashMap via MapJoinBytesTableContainer or - // HybridHashTableContainer. hashTableImplementationType = HashTableImplementationType.OPTIMIZED; } @@ -2085,20 +2372,31 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { Map> keyExprs = desc.getKeys(); List bigTableKeyExprs = keyExprs.get(posBigTable); if (bigTableKeyExprs.size() == 1) { - String typeName = bigTableKeyExprs.get(0).getTypeString(); - LOG.info("Vectorizer vectorizeOperator map join typeName " + typeName); - if (typeName.equals("boolean")) { + TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo(); + LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName()); + switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { + case BOOLEAN: hashTableKeyType = HashTableKeyType.BOOLEAN; - } else if (typeName.equals("tinyint")) { + break; + case BYTE: hashTableKeyType = HashTableKeyType.BYTE; - } else if (typeName.equals("smallint")) { + break; + case SHORT: hashTableKeyType = HashTableKeyType.SHORT; - } else if (typeName.equals("int")) { + break; + case INT: hashTableKeyType = HashTableKeyType.INT; - } else if (typeName.equals("bigint") || typeName.equals("long")) { + break; + case LONG: hashTableKeyType = HashTableKeyType.LONG; - } else if (VectorizationContext.isStringFamily(typeName)) { + break; + case STRING: + case CHAR: + case VARCHAR: + case BINARY: hashTableKeyType = HashTableKeyType.STRING; + default: + // Stay with multi-key. } } } @@ -2106,16 +2404,20 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { switch (joinType) { case JoinDesc.INNER_JOIN: if (!isInnerBigOnly) { + operatorVariation = OperatorVariation.INNER; hashTableKind = HashTableKind.HASH_MAP; } else { + operatorVariation = OperatorVariation.INNER_BIG_ONLY; hashTableKind = HashTableKind.HASH_MULTISET; } break; case JoinDesc.LEFT_OUTER_JOIN: case JoinDesc.RIGHT_OUTER_JOIN: + operatorVariation = OperatorVariation.OUTER; hashTableKind = HashTableKind.HASH_MAP; break; case JoinDesc.LEFT_SEMI_JOIN: + operatorVariation = OperatorVariation.LEFT_SEMI; hashTableKind = HashTableKind.HASH_SET; break; default: @@ -2130,86 +2432,84 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case SHORT: case INT: case LONG: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerLongOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyLongOperator.class; - } + switch (operatorVariation) { + case INNER: + opClass = VectorMapJoinInnerLongOperator.class; break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterLongOperator.class; + case INNER_BIG_ONLY: + opClass = VectorMapJoinInnerBigOnlyLongOperator.class; break; - case JoinDesc.LEFT_SEMI_JOIN: + case LEFT_SEMI: opClass = VectorMapJoinLeftSemiLongOperator.class; break; + case OUTER: + opClass = VectorMapJoinOuterLongOperator.class; + break; default: - throw new HiveException("Unknown join type " + joinType); + throw new HiveException("Unknown operator variation " + operatorVariation); } break; case STRING: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerStringOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyStringOperator.class; - } + switch (operatorVariation) { + case INNER: + opClass = VectorMapJoinInnerStringOperator.class; break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterStringOperator.class; + case INNER_BIG_ONLY: + opClass = VectorMapJoinInnerBigOnlyStringOperator.class; break; - case JoinDesc.LEFT_SEMI_JOIN: + case LEFT_SEMI: opClass = VectorMapJoinLeftSemiStringOperator.class; break; + case OUTER: + opClass = VectorMapJoinOuterStringOperator.class; + break; default: - throw new HiveException("Unknown join type " + joinType); + throw new HiveException("Unknown operator variation " + operatorVariation); } break; case MULTI_KEY: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerMultiKeyOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class; - } + switch (operatorVariation) { + case INNER: + opClass = VectorMapJoinInnerMultiKeyOperator.class; break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterMultiKeyOperator.class; + case INNER_BIG_ONLY: + opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class; break; - case JoinDesc.LEFT_SEMI_JOIN: + case LEFT_SEMI: opClass = VectorMapJoinLeftSemiMultiKeyOperator.class; break; + case OUTER: + opClass = VectorMapJoinOuterMultiKeyOperator.class; + break; default: - throw new HiveException("Unknown join type " + joinType); + throw new HiveException("Unknown operator variation " + operatorVariation); } break; + default: + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); } - vectorOp = OperatorFactory.getVectorOperator( - opClass, op.getCompilationOpContext(), op.getConf(), vContext); - LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName()); - boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED); - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); vectorDesc.setHashTableImplementationType(hashTableImplementationType); vectorDesc.setHashTableKind(hashTableKind); vectorDesc.setHashTableKeyType(hashTableKeyType); + vectorDesc.setOperatorVariation(operatorVariation); vectorDesc.setMinMaxEnabled(minMaxEnabled); + vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), op.getConf(), vContext); + LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName()); + return vectorOp; } - private boolean onExpressionHasNullSafes(MapJoinDesc desc) { + public static boolean onExpressionHasNullSafes(MapJoinDesc desc) { boolean[] nullSafes = desc.getNullSafes(); if (nullSafes == null) { - return false; + return false; } for (boolean nullSafe : nullSafes) { if (nullSafe) { @@ -2220,53 +2520,382 @@ private boolean onExpressionHasNullSafes(MapJoinDesc desc) { } private boolean canSpecializeMapJoin(Operator op, MapJoinDesc desc, - boolean isTez) { + boolean isTezOrSpark, VectorizationContext vContext, VectorMapJoinInfo vectorMapJoinInfo) + throws HiveException { + + Preconditions.checkState(op instanceof MapJoinOperator); + + // Allocate a VectorReduceSinkDesc initially with implementation type NONE so EXPLAIN + // can report this operator was vectorized, but not native. And, the conditions. + VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc(); + desc.setVectorDesc(vectorDesc); + + boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED); + + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + + boolean oneMapJoinCondition = (desc.getConds().length == 1); + + boolean hasNullSafes = onExpressionHasNullSafes(desc); + + byte posBigTable = (byte) desc.getPosBigTable(); + + // Since we want to display all the met and not met conditions in EXPLAIN, we determine all + // information first.... + + List keyDesc = desc.getKeys().get(posBigTable); + VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); + final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length; + boolean isEmptyKey = (allBigTableKeyExpressionsLength == 0); + + boolean supportsKeyTypes = true; // Assume. + HashSet notSupportedKeyTypes = new HashSet(); + + // Since a key expression can be a calculation and the key will go into a scratch column, + // we need the mapping and type information. + int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength]; + String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength]; + TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength]; + ArrayList bigTableKeyExpressionsList = new ArrayList(); + VectorExpression[] bigTableKeyExpressions; + for (int i = 0; i < allBigTableKeyExpressionsLength; i++) { + VectorExpression ve = allBigTableKeyExpressions[i]; + if (!IdentityExpression.isColumnOnly(ve)) { + bigTableKeyExpressionsList.add(ve); + } + bigTableKeyColumnMap[i] = ve.getOutputColumn(); + + ExprNodeDesc exprNode = keyDesc.get(i); + bigTableKeyColumnNames[i] = exprNode.toString(); + + TypeInfo typeInfo = exprNode.getTypeInfo(); + // Verify we handle the key column types for an optimized table. This is the effectively the + // same check used in HashTableLoader. + if (!MapJoinKey.isSupportedField(typeInfo)) { + supportsKeyTypes = false; + Category category = typeInfo.getCategory(); + notSupportedKeyTypes.add( + (category != Category.PRIMITIVE ? category.toString() : + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString())); + } + bigTableKeyTypeInfos[i] = typeInfo; + } + if (bigTableKeyExpressionsList.size() == 0) { + bigTableKeyExpressions = null; + } else { + bigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]); + } - boolean specialize = false; + List bigTableExprs = desc.getExprs().get(posBigTable); + VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs); - if (op instanceof MapJoinOperator && + boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED)) { + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED); - // Currently, only under Tez and non-N-way joins. - if (isTez && desc.getConds().length == 1 && !onExpressionHasNullSafes(desc)) { + // Especially since LLAP is prone to turn it off in the MapJoinDesc in later + // physical optimizer stages... + boolean isHybridHashJoin = desc.isHybridHashJoin(); - // Ok, all basic restrictions satisfied so far... - specialize = true; + /* + * Populate vectorMapJoininfo. + */ - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { + /* + * Similarly, we need a mapping since a value expression can be a calculation and the value + * will go into a scratch column. + */ + int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length]; + String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; + TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length]; + ArrayList bigTableValueExpressionsList = new ArrayList(); + VectorExpression[] bigTableValueExpressions; + for (int i = 0; i < bigTableValueColumnMap.length; i++) { + VectorExpression ve = allBigTableValueExpressions[i]; + if (!IdentityExpression.isColumnOnly(ve)) { + bigTableValueExpressionsList.add(ve); + } + bigTableValueColumnMap[i] = ve.getOutputColumn(); - // We are using the optimized hash table we have further - // restrictions (using optimized and key type). + ExprNodeDesc exprNode = bigTableExprs.get(i); + bigTableValueColumnNames[i] = exprNode.toString(); + bigTableValueTypeInfos[i] = exprNode.getTypeInfo(); + } + if (bigTableValueExpressionsList.size() == 0) { + bigTableValueExpressions = null; + } else { + bigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]); + } - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE)) { - specialize = false; - } else { - byte posBigTable = (byte) desc.getPosBigTable(); - Map> keyExprs = desc.getKeys(); - List bigTableKeyExprs = keyExprs.get(posBigTable); - for (ExprNodeDesc exprNodeDesc : bigTableKeyExprs) { - String typeName = exprNodeDesc.getTypeString(); - if (!MapJoinKey.isSupportedField(typeName)) { - specialize = false; - break; - } + vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap); + vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames); + vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos); + vectorMapJoinInfo.setBigTableKeyExpressions(bigTableKeyExpressions); + + vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap); + vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames); + vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos); + vectorMapJoinInfo.setBigTableValueExpressions(bigTableValueExpressions); + + /* + * Small table information. + */ + VectorColumnOutputMapping bigTableRetainedMapping = + new VectorColumnOutputMapping("Big Table Retained Mapping"); + + VectorColumnOutputMapping bigTableOuterKeyMapping = + new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + + // The order of the fields in the LazyBinary small table value must be used, so + // we use the source ordering flavor for the mapping. + VectorColumnSourceMapping smallTableMapping = + new VectorColumnSourceMapping("Small Table Mapping"); + + Byte[] order = desc.getTagOrder(); + Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); + boolean isOuterJoin = !desc.getNoOuterJoin(); + + /* + * Gather up big and small table output result information from the MapJoinDesc. + */ + List bigTableRetainList = desc.getRetainList().get(posBigTable); + int bigTableRetainSize = bigTableRetainList.size(); + + int[] smallTableIndices; + int smallTableIndicesSize; + List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); + if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); + smallTableIndicesSize = smallTableIndices.length; + } else { + smallTableIndices = null; + smallTableIndicesSize = 0; + } + + List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); + int smallTableRetainSize = smallTableRetainList.size(); + + int smallTableResultSize = 0; + if (smallTableIndicesSize > 0) { + smallTableResultSize = smallTableIndicesSize; + } else if (smallTableRetainSize > 0) { + smallTableResultSize = smallTableRetainSize; + } + + /* + * Determine the big table retained mapping first so we can optimize out (with + * projection) copying inner join big table keys in the subsequent small table results section. + */ + + // We use a mapping object here so we can build the projection in any order and + // get the ordered by 0 to n-1 output columns at the end. + // + // Also, to avoid copying a big table key into the small table result area for inner joins, + // we reference it with the projection so there can be duplicate output columns + // in the projection. + VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); + + int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + for (int i = 0; i < bigTableRetainSize; i++) { + + // Since bigTableValueExpressions may do a calculation and produce a scratch column, we + // need to map to the right batch column. + + int retainColumn = bigTableRetainList.get(i); + int batchColumnIndex = bigTableValueColumnMap[retainColumn]; + TypeInfo typeInfo = bigTableValueTypeInfos[i]; + + // With this map we project the big table batch to make it look like an output batch. + projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo); + + // Collect columns we copy from the big table batch to the overflow batch. + if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { + // Tolerate repeated use of a big table column. + bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + } + + nextOutputColumn++; + } + + /* + * Now determine the small table results. + */ + boolean smallTableExprVectorizes = true; + + int firstSmallTableOutputColumn; + firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0); + int smallTableOutputCount = 0; + nextOutputColumn = firstSmallTableOutputColumn; + + // Small table indices has more information (i.e. keys) than retain, so use it if it exists... + String[] bigTableRetainedNames; + if (smallTableIndicesSize > 0) { + smallTableOutputCount = smallTableIndicesSize; + bigTableRetainedNames = new String[smallTableOutputCount]; + + for (int i = 0; i < smallTableIndicesSize; i++) { + if (smallTableIndices[i] >= 0) { + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + int keyIndex = smallTableIndices[i]; + + // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we + // need to map the right column. + int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; + bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex]; + TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex]; + + if (!isOuterJoin) { + + // Optimize inner join keys of small table results. + + // Project the big table key into the small table result "area". + projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo); + + if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { + // If necessary, copy the big table key into the overflow batch's small table + // result "area". + bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo); } + } else { + + // For outer joins, since the small table key can be null when there is no match, + // we must have a physical (scratch) column for those keys. We cannot use the + // projection optimization used by inner joins above. + + int scratchColumn = vContext.allocateScratchColumn(typeInfo); + projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); + + bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo); + + bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo); } } else { - // With the fast hash table implementation, we currently do not support - // Hybrid Grace Hash Join. + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + int smallTableValueIndex = -smallTableIndices[i] - 1; - if (desc.isHybridHashJoin()) { - specialize = false; + ExprNodeDesc smallTableExprNode = smallTableExprs.get(i); + if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) { + clearNotVectorizedReason(); + smallTableExprVectorizes = false; } + + bigTableRetainedNames[i] = smallTableExprNode.toString(); + + TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); + + // Make a new big table scratch column for the small table value. + int scratchColumn = vContext.allocateScratchColumn(typeInfo); + projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); + + smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); } + nextOutputColumn++; + } + } else if (smallTableRetainSize > 0) { + smallTableOutputCount = smallTableRetainSize; + bigTableRetainedNames = new String[smallTableOutputCount]; + + // Only small table values appear in join output result. + + for (int i = 0; i < smallTableRetainSize; i++) { + int smallTableValueIndex = smallTableRetainList.get(i); + + ExprNodeDesc smallTableExprNode = smallTableExprs.get(i); + if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) { + clearNotVectorizedReason(); + smallTableExprVectorizes = false; + } + + bigTableRetainedNames[i] = smallTableExprNode.toString(); + + // Make a new big table scratch column for the small table value. + TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); + int scratchColumn = vContext.allocateScratchColumn(typeInfo); + + projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); + + smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + nextOutputColumn++; } + } else { + bigTableRetainedNames = new String[0]; } - return specialize; + + boolean useOptimizedTable = + HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); + + // Remember the condition variables for EXPLAIN regardless of whether we specialize or not. + vectorDesc.setUseOptimizedTable(useOptimizedTable); + vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled); + vectorDesc.setEngine(engine); + vectorDesc.setOneMapJoinCondition(oneMapJoinCondition); + vectorDesc.setHasNullSafes(hasNullSafes); + vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes); + + vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled); + vectorDesc.setIsHybridHashJoin(isHybridHashJoin); + + vectorDesc.setSupportsKeyTypes(supportsKeyTypes); + if (!supportsKeyTypes) { + vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes)); + } + + // Check common conditions for both Optimized and Fast Hash Tables. + boolean result = true; // Assume. + if (!useOptimizedTable || + !isVectorizationMapJoinNativeEnabled || + !isTezOrSpark || + !oneMapJoinCondition || + hasNullSafes || + !smallTableExprVectorizes) { + result = false; + } + + // supportsKeyTypes + + if (!isFastHashTableEnabled) { + + // Check optimized-only hash table restrictions. + if (!supportsKeyTypes) { + result = false; + } + + } else { + + // With the fast hash table implementation, we currently do not support + // Hybrid Grace Hash Join. + + if (isHybridHashJoin) { + result = false; + } + } + + // Convert dynamic arrays and maps to simple arrays. + + bigTableRetainedMapping.finalize(); + + bigTableOuterKeyMapping.finalize(); + + smallTableMapping.finalize(); + + vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); + vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); + vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + + projectionMapping.finalize(); + + // Verify we added an entry for each output. + assert projectionMapping.isSourceSequenceGood(); + + vectorMapJoinInfo.setProjectionMapping(projectionMapping); + + return result; } private Operator specializeReduceSinkOperator( @@ -2326,8 +2955,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi throw new HiveException("Unknown reduce sink key type " + reduceSinkKeyType); } - VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc(); - desc.setVectorDesc(vectorDesc); + VectorReduceSinkDesc vectorDesc = (VectorReduceSinkDesc) desc.getVectorDesc(); + vectorDesc.setReduceSinkKeyType(reduceSinkKeyType); vectorDesc.setVectorReduceSinkInfo(vectorReduceSinkInfo); @@ -2339,51 +2968,60 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi } private boolean canSpecializeReduceSink(ReduceSinkDesc desc, - boolean isTez, VectorizationContext vContext, + boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException { - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED)) { - return false; - } + // Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this + // operator was vectorized, but not native. And, the conditions. + VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc(); + desc.setVectorDesc(vectorDesc); - // Many restrictions. + boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED); - if (!isTez) { - return false; - } + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); - if (desc.getWriteType() == AcidUtils.Operation.UPDATE || - desc.getWriteType() == AcidUtils.Operation.DELETE) { - return false; - } + boolean acidChange = + desc.getWriteType() == AcidUtils.Operation.UPDATE || + desc.getWriteType() == AcidUtils.Operation.DELETE; - if (desc.getBucketCols() != null && !desc.getBucketCols().isEmpty()) { - return false; - } + boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty(); - boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM); - if (!useUniformHash) { - return false; - } + boolean hasTopN = desc.getTopN() >= 0; - if (desc.getTopN() >= 0) { - return false; - } + boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM); - if (desc.getDistinctColumnIndices().size() > 0) { - return false; - } + boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0; TableDesc keyTableDesc = desc.getKeySerializeInfo(); Class keySerializerClass = keyTableDesc.getDeserializerClass(); - if (keySerializerClass != org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class) { - return false; - } + boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class); TableDesc valueTableDesc = desc.getValueSerializeInfo(); Class valueDeserializerClass = valueTableDesc.getDeserializerClass(); - if (valueDeserializerClass != org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class) { + boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class); + + // Remember the condition variables for EXPLAIN regardless. + vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled); + vectorDesc.setEngine(engine); + vectorDesc.setAcidChange(acidChange); + vectorDesc.setHasBuckets(hasBuckets); + vectorDesc.setHasTopN(hasTopN); + vectorDesc.setUseUniformHash(useUniformHash); + vectorDesc.setHasDistinctColumns(hasDistinctColumns); + vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable); + vectorDesc.setIsValueLazyBinary(isValueLazyBinary); + + // Many restrictions. + if (!isVectorizationReduceSinkNativeEnabled || + !isTezOrSpark || + acidChange || + hasBuckets || + hasTopN || + !useUniformHash || + hasDistinctColumns || + !isKeyBinarySortable || + !isValueLazyBinary) { return false; } @@ -2453,21 +3091,136 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, return true; } - Operator vectorizeOperator(Operator op, - VectorizationContext vContext, boolean isTez) throws HiveException { + private boolean usesVectorUDFAdaptor(VectorExpression vecExpr) { + if (vecExpr == null) { + return false; + } + if (vecExpr instanceof VectorUDFAdaptor) { + return true; + } + if (usesVectorUDFAdaptor(vecExpr.getChildExpressions())) { + return true; + } + return false; + } + + private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { + if (vecExprs == null) { + return false; + } + for (VectorExpression vecExpr : vecExprs) { + if (usesVectorUDFAdaptor(vecExpr)) { + return true; + } + } + return false; + } + + public static Operator vectorizeTableScanOperator( + Operator tableScanOp, VectorizationContext vContext) + throws HiveException { + TableScanDesc tableScanDesc = (TableScanDesc) tableScanOp.getConf(); + VectorTableScanDesc vectorTableScanDesc = new VectorTableScanDesc(); + tableScanDesc.setVectorDesc(vectorTableScanDesc); + vectorTableScanDesc.setProjectedOutputColumns( + ArrayUtils.toPrimitive(vContext.getProjectedColumns().toArray(new Integer[0]))); + return tableScanOp; + } + + public static Operator vectorizeFilterOperator( + Operator filterOp, VectorizationContext vContext) + throws HiveException { + FilterDesc filterDesc = (FilterDesc) filterOp.getConf(); + VectorFilterDesc vectorFilterDesc = new VectorFilterDesc(); + filterDesc.setVectorDesc(vectorFilterDesc); + ExprNodeDesc predicateExpr = filterDesc.getPredicate(); + VectorExpression vectorPredicateExpr = + vContext.getVectorExpression(predicateExpr, VectorExpressionDescriptor.Mode.FILTER); + vectorFilterDesc.setPredicateExpression(vectorPredicateExpr); + return OperatorFactory.getVectorOperator( + filterOp.getCompilationOpContext(), filterDesc, vContext); + } + + /* + * NOTE: The VectorGroupByDesc has already been allocated and partially populated. + */ + public static Operator vectorizeGroupByOperator( + Operator groupByOp, VectorizationContext vContext) + throws HiveException { + GroupByDesc groupByDesc = (GroupByDesc) groupByOp.getConf(); + List keysDesc = groupByDesc.getKeys(); + VectorExpression[] vecKeyExpressions = vContext.getVectorExpressions(keysDesc); + ArrayList aggrDesc = groupByDesc.getAggregators(); + final int size = aggrDesc.size(); + VectorAggregateExpression[] vecAggregators = new VectorAggregateExpression[size]; + int[] projectedOutputColumns = new int[size]; + for (int i = 0; i < size; ++i) { + AggregationDesc aggDesc = aggrDesc.get(i); + vecAggregators[i] = vContext.getAggregatorExpression(aggDesc); + + // GroupBy generates a new vectorized row batch... + projectedOutputColumns[i] = i; + } + VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) groupByDesc.getVectorDesc(); + vectorGroupByDesc.setKeyExpressions(vecKeyExpressions); + vectorGroupByDesc.setAggregators(vecAggregators); + vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns); + return OperatorFactory.getVectorOperator( + groupByOp.getCompilationOpContext(), groupByDesc, vContext); + } + + public static Operator vectorizeSelectOperator( + Operator selectOp, VectorizationContext vContext) + throws HiveException { + SelectDesc selectDesc = (SelectDesc) selectOp.getConf(); + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + selectDesc.setVectorDesc(vectorSelectDesc); + List colList = selectDesc.getColList(); + int index = 0; + final int size = colList.size(); + VectorExpression[] vectorSelectExprs = new VectorExpression[size]; + int[] projectedOutputColumns = new int[size]; + for (int i = 0; i < size; i++) { + ExprNodeDesc expr = colList.get(i); + VectorExpression ve = vContext.getVectorExpression(expr); + projectedOutputColumns[i] = ve.getOutputColumn(); + if (ve instanceof IdentityExpression) { + // Suppress useless evaluation. + continue; + } + vectorSelectExprs[index++] = ve; + } + if (index < size) { + vectorSelectExprs = Arrays.copyOf(vectorSelectExprs, index); + } + vectorSelectDesc.setSelectExpressions(vectorSelectExprs); + vectorSelectDesc.setProjectedOutputColumns(projectedOutputColumns); + return OperatorFactory.getVectorOperator( + selectOp.getCompilationOpContext(), selectDesc, vContext); + } + + public Operator vectorizeOperator(Operator op, + VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) + throws HiveException { Operator vectorOp = null; + boolean isNative; switch (op.getType()) { + case TABLESCAN: + vectorOp = vectorizeTableScanOperator(op, vContext); + isNative = true; + break; case MAPJOIN: { - MapJoinDesc desc = (MapJoinDesc) op.getConf(); - boolean specialize = canSpecializeMapJoin(op, desc, isTez || isSpark); - - if (!specialize) { - - Class> opClass = null; - if (op instanceof MapJoinOperator) { - + if (op instanceof MapJoinOperator) { + VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo(); + MapJoinDesc desc = (MapJoinDesc) op.getConf(); + boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinInfo); + + if (!specialize) { + + Class> opClass = null; + // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered... List bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable()); @@ -2477,20 +3230,36 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, } else { opClass = VectorMapJoinOuterFilteredOperator.class; } - } else if (op instanceof SMBMapJoinOperator) { - opClass = VectorSMBMapJoinOperator.class; + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), op.getConf(), vContext); + isNative = false; + } else { + + // TEMPORARY Until Native Vector Map Join with Hybrid passes tests... + // HiveConf.setBoolVar(physicalContext.getConf(), + // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false); + + vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinInfo); + isNative = true; + + if (vectorTaskColumnInfo != null) { + if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } } - - vectorOp = OperatorFactory.getVectorOperator( - opClass, op.getCompilationOpContext(), op.getConf(), vContext); - } else { - - // TEMPORARY Until Native Vector Map Join with Hybrid passes tests... - // HiveConf.setBoolVar(physicalContext.getConf(), - // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false); - - vectorOp = specializeMapJoinOperator(op, vContext, desc); + Preconditions.checkState(op instanceof SMBMapJoinOperator); + SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf(); + VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc(); + smbJoinSinkDesc.setVectorDesc(vectorSMBJoinDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), smbJoinSinkDesc, vContext); + isNative = false; } } break; @@ -2499,39 +3268,144 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, { VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo(); ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf(); - boolean specialize = canSpecializeReduceSink(desc, isTez, vContext, vectorReduceSinkInfo); + boolean specialize = canSpecializeReduceSink(desc, isTezOrSpark, vContext, vectorReduceSinkInfo); if (!specialize) { vectorOp = OperatorFactory.getVectorOperator( op.getCompilationOpContext(), op.getConf(), vContext); - + isNative = false; } else { vectorOp = specializeReduceSinkOperator(op, vContext, desc, vectorReduceSinkInfo); + isNative = true; + if (vectorTaskColumnInfo != null) { + if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } } } break; - case GROUPBY: case FILTER: + { + vectorOp = vectorizeFilterOperator(op, vContext); + isNative = true; + if (vectorTaskColumnInfo != null) { + VectorFilterDesc vectorFilterDesc = + (VectorFilterDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc(); + VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression(); + if (usesVectorUDFAdaptor(vectorPredicateExpr)) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } + } + break; case SELECT: + { + vectorOp = vectorizeSelectOperator(op, vContext); + isNative = true; + if (vectorTaskColumnInfo != null) { + VectorSelectDesc vectorSelectDesc = + (VectorSelectDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc(); + VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions(); + if (usesVectorUDFAdaptor(vectorSelectExprs)) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } + } + break; + case GROUPBY: + { + vectorOp = vectorizeGroupByOperator(op, vContext); + isNative = false; + if (vectorTaskColumnInfo != null) { + VectorGroupByDesc vectorGroupByDesc = + (VectorGroupByDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc(); + if (!vectorGroupByDesc.isVectorOutput()) { + vectorTaskColumnInfo.setGroupByVectorOutput(false); + } + VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions(); + if (usesVectorUDFAdaptor(vecKeyExpressions)) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + for (VectorAggregateExpression vecAggr : vecAggregators) { + if (usesVectorUDFAdaptor(vecAggr.inputExpression())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } + } + + } + break; case FILESINK: + { + FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf(); + VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc(); + fileSinkDesc.setVectorDesc(vectorFileSinkDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), fileSinkDesc, vContext); + isNative = false; + } + break; case LIMIT: - case EXTRACT: + { + LimitDesc limitDesc = (LimitDesc) op.getConf(); + VectorLimitDesc vectorLimitDesc = new VectorLimitDesc(); + limitDesc.setVectorDesc(vectorLimitDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), limitDesc, vContext); + isNative = true; + } + break; case EVENT: + { + AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf(); + VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc(); + eventDesc.setVectorDesc(vectorEventDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), eventDesc, vContext); + isNative = true; + } + break; case HASHTABLESINK: + { + SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf(); + VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc(); + sparkHashTableSinkDesc.setVectorDesc(vectorSparkHashTableSinkDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext); + isNative = true; + } + break; case SPARKPRUNINGSINK: - vectorOp = OperatorFactory.getVectorOperator( - op.getCompilationOpContext(), op.getConf(), vContext); + { + SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf(); + VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc(); + sparkPartitionPruningSinkDesc.setVectorDesc(vectorSparkPartitionPruningSinkDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext); + isNative = true; + } break; default: + // These are children of GROUP BY operators with non-vector outputs. + isNative = false; vectorOp = op; break; } + Preconditions.checkState(vectorOp != null); + if (vectorTaskColumnInfo != null && !isNative) { + vectorTaskColumnInfo.setAllNative(false); + } - LOG.debug("vectorizeOperator " + (vectorOp == null ? "NULL" : vectorOp.getClass().getName())); - LOG.debug("vectorizeOperator " + (vectorOp == null || vectorOp.getConf() == null ? "NULL" : vectorOp.getConf().getClass().getName())); + LOG.debug("vectorizeOperator " + vectorOp.getClass().getName()); + LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName()); if (vectorOp != op) { fixupParentChildOperators(op, vectorOp); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java new file mode 100644 index 0000000..e0a6198 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** + * Why a node did not vectorize. + * + */ +public class VectorizerReason { + + private static long serialVersionUID = 1L; + + public static enum VectorizerNodeIssue { + NONE, + NODE_ISSUE, + OPERATOR_ISSUE, + EXPRESSION_ISSUE + } + + private final VectorizerNodeIssue vectorizerNodeIssue; + + private final Operator operator; + + private final String expressionTitle; + + private final String issue; + + private VectorizerReason(VectorizerNodeIssue vectorizerNodeIssue, + Operator operator, String expressionTitle, String issue) { + this.vectorizerNodeIssue = vectorizerNodeIssue; + this.operator = operator; + this.expressionTitle = expressionTitle; + this.issue = issue; + } + + public static VectorizerReason createNodeIssue(String issue) { + return new VectorizerReason( + VectorizerNodeIssue.NODE_ISSUE, + null, + null, + issue); + } + + public static VectorizerReason createOperatorIssue(Operator operator, + String issue) { + return new VectorizerReason( + VectorizerNodeIssue.OPERATOR_ISSUE, + operator, + null, + issue); + } + + public static VectorizerReason createExpressionIssue(Operator operator, + String expressionTitle, String issue) { + return new VectorizerReason( + VectorizerNodeIssue.EXPRESSION_ISSUE, + operator, + expressionTitle, + issue); + } + + @Override + public VectorizerReason clone() { + return new VectorizerReason(vectorizerNodeIssue, operator, expressionTitle, issue); + } + + public VectorizerNodeIssue getVectorizerNodeIssue() { + return vectorizerNodeIssue; + } + + public Operator getOperator() { + return operator; + } + + public String getExpressionTitle() { + return expressionTitle; + } + + public String getIssue() { + return issue; + } + + @Override + public String toString() { + String reason; + switch (vectorizerNodeIssue) { + case NODE_ISSUE: + reason = (issue == null ? "unknown" : issue); + break; + case OPERATOR_ISSUE: + reason = (operator == null ? "Unknown" : operator.getType()) + " operator: " + + (issue == null ? "unknown" : issue); + break; + case EXPRESSION_ISSUE: + reason = expressionTitle + " expression for " + + (operator == null ? "Unknown" : operator.getType()) + " operator: " + + (issue == null ? "unknown" : issue); + break; + default: + reason = "Unknown " + vectorizerNodeIssue; + } + return reason; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 4a8ff15..1f118dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -27,12 +27,27 @@ */ public class ExplainConfiguration { + + public enum VectorizationDetailLevel { + + SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1); + + public final int rank; + VectorizationDetailLevel(int rank) { + this.rank = rank; + } + }; + private boolean extended = false; private boolean formatted = false; private boolean dependency = false; private boolean logical = false; private boolean authorize = false; private boolean userLevelExplain = false; + private boolean vectorization = false; + private boolean vectorizationOnly = false; + private VectorizationDetailLevel vectorizationDetailLevel = VectorizationDetailLevel.SUMMARY; + private Path explainRootPath; private Map opIdToRuntimeNumRows; @@ -98,6 +113,30 @@ public void setUserLevelExplain(boolean userLevelExplain) { this.userLevelExplain = userLevelExplain; } + public boolean isVectorization() { + return vectorization; + } + + public void setVectorization(boolean vectorization) { + this.vectorization = vectorization; + } + + public boolean isVectorizationOnly() { + return vectorizationOnly; + } + + public void setVectorizationOnly(boolean vectorizationOnly) { + this.vectorizationOnly = vectorizationOnly; + } + + public VectorizationDetailLevel getVectorizationDetailLevel() { + return vectorizationDetailLevel; + } + + public void setVectorizationDetailLevel(VectorizationDetailLevel vectorizationDetailLevel) { + this.vectorizationDetailLevel = vectorizationDetailLevel; + } + public Path getExplainRootPath() { return explainRootPath; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 300542e..f62cf9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; @@ -70,7 +71,9 @@ public ExplainSemanticAnalyzer(QueryState queryState) throws SemanticException { @SuppressWarnings("unchecked") @Override public void analyzeInternal(ASTNode ast) throws SemanticException { - for (int i = 1; i < ast.getChildCount(); i++) { + final int childCount = ast.getChildCount(); + int i = 1; // Skip TOK_QUERY. + while (i < childCount) { int explainOptions = ast.getChild(i).getType(); if (explainOptions == HiveParser.KW_FORMATTED) { config.setFormatted(true); @@ -85,7 +88,40 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } else if (explainOptions == HiveParser.KW_ANALYZE) { config.setAnalyze(AnalyzeState.RUNNING); config.setExplainRootPath(ctx.getMRTmpPath()); + } else if (explainOptions == HiveParser.KW_VECTORIZATION) { + config.setVectorization(true); + if (i + 1 < childCount) { + int vectorizationOption = ast.getChild(i + 1).getType(); + + // [ONLY] + if (vectorizationOption == HiveParser.TOK_ONLY) { + config.setVectorizationOnly(true); + i++; + if (i + 1 >= childCount) { + break; + } + vectorizationOption = ast.getChild(i + 1).getType(); + } + + // [SUMMARY|OPERATOR|EXPRESSION|DETAIL] + if (vectorizationOption == HiveParser.TOK_SUMMARY) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY); + i++; + } else if (vectorizationOption == HiveParser.TOK_OPERATOR) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR); + i++; + } else if (vectorizationOption == HiveParser.TOK_EXPRESSION) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION); + i++; + } else if (vectorizationOption == HiveParser.TOK_DETAIL) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL); + i++; + } + } + } else { + // UNDONE: UNKNOWN OPTION? } + i++; } ctx.setExplainConfig(config); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index e9ccfd2..9f6336a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -342,6 +342,11 @@ KW_REPL: 'REPL'; KW_DUMP: 'DUMP'; KW_BATCH: 'BATCH'; KW_STATUS: 'STATUS'; +KW_VECTORIZATION: 'VECTORIZATION'; +KW_SUMMARY: 'SUMMARY'; +KW_OPERATOR: 'OPERATOR'; +KW_EXPRESSION: 'EXPRESSION'; +KW_DETAIL: 'DETAIL'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 918169a..cf8114e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -395,6 +395,11 @@ TOK_REPL_LOAD; TOK_REPL_STATUS; TOK_BATCH; TOK_TO; +TOK_ONLY; +TOK_SUMMARY; +TOK_OPERATOR; +TOK_EXPRESSION; +TOK_DETAIL; } @@ -734,7 +739,28 @@ explainStatement explainOption @init { msgs.push("explain option"); } @after { msgs.pop(); } - : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE + : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE| + (KW_VECTORIZATION vectorizationOnly? vectorizatonDetail?) + ; + +vectorizationOnly +@init { pushMsg("vectorization's only clause", state); } +@after { popMsg(state); } + : KW_ONLY + -> ^(TOK_ONLY) + ; + +vectorizatonDetail +@init { pushMsg("vectorization's detail level clause", state); } +@after { popMsg(state); } + : KW_SUMMARY + -> ^(TOK_SUMMARY) + | KW_OPERATOR + -> ^(TOK_OPERATOR) + | KW_EXPRESSION + -> ^(TOK_EXPRESSION) + | KW_DETAIL + -> ^(TOK_DETAIL) ; execStatement diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 43fd69e..1d8c3e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -749,6 +749,12 @@ nonReserved | KW_MATCHED | KW_REPL | KW_DUMP | KW_BATCH | KW_STATUS | KW_CACHE | KW_DAYOFWEEK | KW_VIEWS + | KW_VECTORIZATION + | KW_SUMMARY + | KW_OPERATOR + | KW_EXPRESSION + | KW_DETAIL + ; //The following SQL2011 reserved keywords are used as function name only, but not as identifiers. diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index 7df9d07..fd46aba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -29,6 +29,10 @@ public class AbstractOperatorDesc implements OperatorDesc { protected boolean vectorMode = false; + + // Extra parameters only for vectorization. + protected VectorDesc vectorDesc; + protected Statistics statistics; protected transient OpTraits opTraits; protected transient Map opProps; @@ -65,6 +69,14 @@ public void setVectorMode(boolean vm) { this.vectorMode = vm; } + public void setVectorDesc(VectorDesc vectorDesc) { + this.vectorDesc = vectorDesc; + } + + public VectorDesc getVectorDesc() { + return vectorDesc; + } + @Override public OpTraits getTraits() { return opTraits; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java index 5157ebd..e85a418 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java @@ -18,10 +18,24 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.exec.Operator; + public class AbstractVectorDesc implements VectorDesc { + private static long serialVersionUID = 1L; + + private Class vectorOpClass; + @Override - public Object clone() throws CloneNotSupportedException { - throw new CloneNotSupportedException("clone not supported"); + public Object clone() { + throw new RuntimeException("clone not supported"); + } + + public void setVectorOp(Class vectorOpClass) { + this.vectorOpClass = vectorOpClass; + } + + public Class getVectorOpClass() { + return vectorOpClass; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java index 264f959..c5294f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java @@ -19,7 +19,10 @@ package org.apache.hadoop.hive.ql.plan; import java.io.IOException; +import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.io.DataOutputBuffer; @@ -60,4 +63,25 @@ public void setTable(TableDesc table) { public void writeEventHeader(DataOutputBuffer buffer) throws IOException { // nothing to add } + + public class AppMasterEventOperatorExplainVectorization extends OperatorExplainVectorization { + + private final AppMasterEventDesc appMasterEventDesc; + private final VectorAppMasterEventDesc vectorAppMasterEventDesc; + + public AppMasterEventOperatorExplainVectorization(AppMasterEventDesc appMasterEventDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.appMasterEventDesc = appMasterEventDesc; + vectorAppMasterEventDesc = (VectorAppMasterEventDesc) vectorDesc; + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "App Master Event Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public AppMasterEventOperatorExplainVectorization getAppMasterEventVectorization() { + if (vectorDesc == null) { + return null; + } + return new AppMasterEventOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 8c341fc..286ee3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.plan; +import java.util.Arrays; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; @@ -35,7 +36,9 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -67,12 +70,25 @@ public BaseWork(String name) { private String name; - // Vectorization. + /* + * Vectorization. + */ + + // This will be true if a node was examined by the Vectorizer class. + protected boolean vectorizationExamined; + + protected boolean vectorizationEnabled; protected VectorizedRowBatchCtx vectorizedRowBatchCtx; protected boolean useVectorizedInputFileFormat; + private VectorizerReason notVectorizedReason; + + private boolean groupByVectorOutput; + private boolean allNative; + private boolean usesVectorUDFAdaptor; + protected boolean llapMode = false; protected boolean uberMode = false; @@ -169,6 +185,22 @@ public void addDummyOp(HashTableDummyOperator dummyOp) { // ----------------------------------------------------------------------------------------------- + public void setVectorizationExamined(boolean vectorizationExamined) { + this.vectorizationExamined = vectorizationExamined; + } + + public boolean getVectorizationExamined() { + return vectorizationExamined; + } + + public void setVectorizationEnabled(boolean vectorizationEnabled) { + this.vectorizationEnabled = vectorizationEnabled; + } + + public boolean getVectorizationEnabled() { + return vectorizationEnabled; + } + /* * The vectorization context for creating the VectorizedRowBatch for the node. */ @@ -180,23 +212,160 @@ public void setVectorizedRowBatchCtx(VectorizedRowBatchCtx vectorizedRowBatchCtx this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } - /* - * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable - * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated - * vectorizing this node. - * - * When Vectorized Input File Format looks at this flag, it can determine whether it should - * operate vectorized or not. In some modes, the node can be vectorized but use row - * serialization. - */ - public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { - this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + public void setNotVectorizedReason(VectorizerReason notVectorizedReason) { + this.notVectorizedReason = notVectorizedReason; + } + + public VectorizerReason getNotVectorizedReason() { + return notVectorizedReason; + } + + public void setGroupByVectorOutput(boolean groupByVectorOutput) { + this.groupByVectorOutput = groupByVectorOutput; + } + + public boolean getGroupByVectorOutput() { + return groupByVectorOutput; + } + + public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) { + this.usesVectorUDFAdaptor = usesVectorUDFAdaptor; } - public boolean getUseVectorizedInputFileFormat() { - return useVectorizedInputFileFormat; + public boolean getUsesVectorUDFAdaptor() { + return usesVectorUDFAdaptor; } + public void setAllNative(boolean allNative) { + this.allNative = allNative; + } + + public boolean getAllNative() { + return allNative; + } + + public static class BaseExplainVectorization { + + private final BaseWork baseWork; + + public BaseExplainVectorization(BaseWork baseWork) { + this.baseWork = baseWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean enabled() { + return baseWork.getVectorizationEnabled(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "vectorized", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean vectorized() { + if (!baseWork.getVectorizationEnabled()) { + return null; + } + return baseWork.getVectorMode(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "notVectorizedReason", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String notVectorizedReason() { + if (!baseWork.getVectorizationEnabled() || baseWork.getVectorMode()) { + return null; + } + VectorizerReason notVectorizedReason = baseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + return "Unknown"; + } + return notVectorizedReason.toString(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "groupByVectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean groupByRowOutputCascade() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getGroupByVectorOutput(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "allNative", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean nativeVectorized() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getAllNative(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "usesVectorUDFAdaptor", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean usesVectorUDFAdaptor() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getUsesVectorUDFAdaptor(); + } + + public static class RowBatchContextExplainVectorization { + + private final VectorizedRowBatchCtx vectorizedRowBatchCtx; + + public RowBatchContextExplainVectorization(VectorizedRowBatchCtx vectorizedRowBatchCtx) { + this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; + } + + private List getColumns(int startIndex, int count) { + String[] rowColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); + TypeInfo[] rowColumnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); + List result = new ArrayList(count); + final int end = startIndex + count; + for (int i = startIndex; i < end; i++) { + result.add(rowColumnNames[i] + ":" + rowColumnTypeInfos[i]); + } + return result; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getDataColumns() { + return getColumns(0, vectorizedRowBatchCtx.getDataColumnCount()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getPartitionColumns() { + return getColumns(vectorizedRowBatchCtx.getDataColumnCount(), vectorizedRowBatchCtx.getPartitionColumnCount()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "includeColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getDataColumnNums() { + int[] dataColumnNums = vectorizedRowBatchCtx.getDataColumnNums(); + if (dataColumnNums == null) { + return null; + } + return Arrays.toString(vectorizedRowBatchCtx.getDataColumnNums()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public int getDataColumnCount() { + return vectorizedRowBatchCtx.getDataColumnCount(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public int getPartitionColumnCount() { + return vectorizedRowBatchCtx.getPartitionColumnCount(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "scratchColumnTypeNames", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getScratchColumnTypeNames() { + return Arrays.asList(vectorizedRowBatchCtx.getScratchColumnTypeNames()); + } + + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "rowBatchContext", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public RowBatchContextExplainVectorization vectorizedRowBatchContext() { + if (!baseWork.getVectorMode()) { + return null; + } + return new RowBatchContextExplainVectorization(baseWork.getVectorizedRowBatchCtx()); + } + } + + // ----------------------------------------------------------------------------------------------- /** diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index b0b6c3a..7b16ad7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -45,4 +45,33 @@ public boolean in(Level[] levels) { boolean displayOnlyOnTrue() default false; boolean skipHeader() default false; + + // By default, many existing @Explain classes/methods are NON_VECTORIZED. + // + // Vectorized methods/classes have detail levels: + // SUMMARY, OPERATOR, EXPRESSION, or DETAIL. + // As you go to the right you get more detail and the information for the previous level(s) is + // included. The default is SUMMARY. + // + // The "path" enumerations are used to mark methods/classes that lead to vectorization specific + // ones so we can avoid displaying headers for things that have no vectorization information + // below. + // + // For example, the TezWork class is marked SUMMARY_PATH because it leads to both + // SUMMARY and OPERATOR methods/classes. And, MapWork.getAllRootOperators is marked OPERATOR_PATH + // because we only display operator information for OPERATOR. + // + // EXPRESSION and DETAIL typically live inside SUMMARY or OPERATOR classes. + // + public enum Vectorization { + SUMMARY_PATH(4), OPERATOR_PATH(3), + SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1), + NON_VECTORIZED(Integer.MAX_VALUE); + + public final int rank; + Vectorization(int rank) { + this.rank = rank; + } + }; + Vectorization vectorization() default Vectorization.NON_VECTORIZED; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 9f4767c..805357c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; import org.apache.hadoop.hive.ql.parse.ParseContext; /** @@ -117,6 +118,18 @@ public boolean isFormatted() { return config.isFormatted(); } + public boolean isVectorization() { + return config.isVectorization(); + } + + public boolean isVectorizationOnly() { + return config.isVectorizationOnly(); + } + + public VectorizationDetailLevel isVectorizationDetailLevel() { + return config.getVectorizationDetailLevel(); + } + public ParseContext getParseContext() { return pCtx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 4430107..beb6689 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.fs.Path; @@ -30,14 +31,17 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * FetchWork. * */ -@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class FetchWork implements Serializable { private static final long serialVersionUID = 1L; @@ -321,4 +325,43 @@ public String toString() { return ret; } + + // ----------------------------------------------------------------------------------------------- + + private boolean vectorizationExamined; + + public void setVectorizationExamined(boolean vectorizationExamined) { + this.vectorizationExamined = vectorizationExamined; + } + + public boolean getVectorizationExamined() { + return vectorizationExamined; + } + + public class FetchExplainVectorization { + + private final FetchWork fetchWork; + + public FetchExplainVectorization(FetchWork fetchWork) { + this.fetchWork = fetchWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean enabled() { + return false; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List enabledConditionsNotMet() { + return VectorizationCondition.getConditionsSupported(false); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Fetch Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FetchExplainVectorization getMapExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new FetchExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index 4d9139b..5cc1c45 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * FileSinkDesc. @@ -488,4 +488,19 @@ public void setStatsTmpDir(String statsCollectionTempDir) { this.statsTmpDir = statsCollectionTempDir; } + public class FileSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + public FileSinkOperatorExplainVectorization(VectorDesc vectorDesc) { + // Native vectorization not supported. + super(vectorDesc, false); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "File Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FileSinkOperatorExplainVectorization getFileSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new FileSinkOperatorExplainVectorization(vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index fa20798..e93660a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; @@ -186,4 +187,30 @@ public Object clone() { filterDesc.setSortedFilter(isSortedFilter()); return filterDesc; } + + public class FilterOperatorExplainVectorization extends OperatorExplainVectorization { + + private final FilterDesc filterDesc; + private final VectorFilterDesc vectorFilterDesc; + + public FilterOperatorExplainVectorization(FilterDesc filterDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.filterDesc = filterDesc; + vectorFilterDesc = (VectorFilterDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "predicateExpression", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getPredicateExpression() { + return vectorFilterDesc.getPredicateExpression().toString(); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Filter Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FilterOperatorExplainVectorization getFilterVectorization() { + if (vectorDesc == null) { + return null; + } + return new FilterOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 99791e5..0b49294 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -19,13 +19,18 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hive.common.util.AnnotationUtils; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; /** @@ -71,9 +76,6 @@ transient private boolean isDistinct; private boolean dontResetAggrsDistinct; - // Extra parameters only for vectorization. - private VectorGroupByDesc vectorDesc; - public GroupByDesc() { vectorDesc = new VectorGroupByDesc(); } @@ -120,14 +122,6 @@ public GroupByDesc( this.isDistinct = isDistinct; } - public void setVectorDesc(VectorGroupByDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorGroupByDesc getVectorDesc() { - return vectorDesc; - } - public Mode getMode() { return mode; } @@ -311,4 +305,66 @@ public void setDistinct(boolean isDistinct) { this.isDistinct = isDistinct; } + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { + + private final GroupByDesc groupByDesc; + private final VectorGroupByDesc vectorGroupByDesc; + + public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorDesc vectorDesc) { + // Native vectorization not supported. + super(vectorDesc, false); + this.groupByDesc = groupByDesc; + vectorGroupByDesc = (VectorGroupByDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeysExpression() { + return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getAggregators() { + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + List vecAggrList = new ArrayList(vecAggregators.length); + for (VectorAggregateExpression vecAggr : vecAggregators) { + vecAggrList.add(vecAggr.toString()); + } + return vecAggrList; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean getGroupByRowOutputCascade() { + return vectorGroupByDesc.isVectorOutput(); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getVectorOutputConditionsNotMet() { + List results = new ArrayList(); + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + for (VectorAggregateExpression vecAggr : vecAggregators) { + Category category = Vectorizer.aggregationOutputCategory(vecAggr); + if (category != ObjectInspector.Category.PRIMITIVE) { + results.add( + "Vector output of " + vecAggr.toString() + " output type " + category + " requires PRIMITIVE IS false"); + } + } + if (results.size() == 0) { + return null; + } + return results; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns()); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public GroupByOperatorExplainVectorization getGroupByVectorization() { + if (vectorDesc == null) { + return null; + } + return new GroupByOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java index 8448a41..b9cf337 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java @@ -17,7 +17,9 @@ */ package org.apache.hadoop.hive.ql.plan; + import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -73,4 +75,19 @@ public void setLeastRows(int leastRows) { this.leastRows = leastRows; } + public class LimitOperatorExplainVectorization extends OperatorExplainVectorization { + + public LimitOperatorExplainVectorization(LimitDesc limitDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Limit Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public LimitOperatorExplainVectorization getLimitVectorization() { + if (vectorDesc == null) { + return null; + } + return new LimitOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index ec35860..ca69697 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -20,14 +20,25 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Map Join operator Descriptor implementation. @@ -73,17 +84,16 @@ private boolean isHybridHashJoin; private boolean isDynamicPartitionHashJoin = false; - // Extra parameters only for vectorization. - private VectorMapJoinDesc vectorDesc; - public MapJoinDesc() { - vectorDesc = new VectorMapJoinDesc(); + vectorDesc = null; bigTableBucketNumMapping = new LinkedHashMap(); } public MapJoinDesc(MapJoinDesc clone) { super(clone); - vectorDesc = new VectorMapJoinDesc(clone.vectorDesc); + if (clone.vectorDesc != null) { + vectorDesc = (VectorDesc) clone.vectorDesc.clone(); + } this.keys = clone.keys; this.keyTblDesc = clone.keyTblDesc; this.valueTblDescs = clone.valueTblDescs; @@ -108,7 +118,7 @@ public MapJoinDesc(final Map> keys, final int posBigTable, final JoinCondDesc[] conds, final Map> filters, boolean noOuterJoin, String dumpFilePrefix) { super(values, outputColumnNames, noOuterJoin, conds, filters, null); - vectorDesc = new VectorMapJoinDesc(); + vectorDesc = null; this.keys = keys; this.keyTblDesc = keyTblDesc; this.valueTblDescs = valueTblDescs; @@ -119,14 +129,6 @@ public MapJoinDesc(final Map> keys, initRetainExprList(); } - public void setVectorDesc(VectorMapJoinDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorMapJoinDesc getVectorDesc() { - return vectorDesc; - } - private void initRetainExprList() { retainList = new HashMap>(); Set>> set = super.getExprs().entrySet(); @@ -388,4 +390,204 @@ public boolean isDynamicPartitionHashJoin() { public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { this.isDynamicPartitionHashJoin = isDistributedHashJoin; } + + // Use LinkedHashSet to give predictable display order. + private static Set vectorizableMapJoinNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + + public class MapJoinOperatorExplainVectorization extends OperatorExplainVectorization { + + private final MapJoinDesc mapJoinDesc; + private final VectorMapJoinDesc vectorMapJoinDesc; + private final VectorMapJoinInfo vectorMapJoinInfo; + + private VectorizationCondition[] nativeConditions; + + public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, VectorDesc vectorDesc) { + // VectorMapJoinOperator is not native vectorized. + super(vectorDesc, ((VectorMapJoinDesc) vectorDesc).hashTableImplementationType() != HashTableImplementationType.NONE); + this.mapJoinDesc = mapJoinDesc; + vectorMapJoinDesc = (VectorMapJoinDesc) vectorDesc; + vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo(); + } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorMapJoinDesc.getIsVectorizationMapJoinNativeEnabled(); + + String engine = vectorMapJoinDesc.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableMapJoinNativeEngines; + boolean engineInSupported = vectorizableMapJoinNativeEngines.contains(engine); + + boolean isFastHashTableEnabled = vectorMapJoinDesc.getIsFastHashTableEnabled(); + + List conditionList = new ArrayList(); + conditionList.add( + new VectorizationCondition( + vectorMapJoinDesc.getUseOptimizedTable(), + HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE.varname)); + conditionList.add( + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED.varname)); + conditionList.add( + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName)); + conditionList.add( + new VectorizationCondition( + vectorMapJoinDesc.getOneMapJoinCondition(), + "One MapJoin Condition")); + conditionList.add( + new VectorizationCondition( + !vectorMapJoinDesc.getHasNullSafes(), + "No nullsafe")); + conditionList.add( + new VectorizationCondition( + vectorMapJoinDesc.getSmallTableExprVectorizes(), + "Small table vectorizes")); + + if (isFastHashTableEnabled) { + conditionList.add( + new VectorizationCondition( + !vectorMapJoinDesc.getIsHybridHashJoin(), + "Fast Hash Table and No Hybrid Hash Join")); + } else { + conditionList.add( + new VectorizationCondition( + vectorMapJoinDesc.getSupportsKeyTypes(), + "Optimized Table and Supports Key Types")); + } + + VectorizationCondition[] conditions = + conditionList.toArray(new VectorizationCondition[0]); + + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableKeyExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableKeyExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableKeyColumns() { + if (!isNative) { + return null; + } + int[] bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); + if (bigTableKeyColumnMap.length == 0) { + return null; + } + return Arrays.toString(bigTableKeyColumnMap); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableValueExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableValueExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableValueColumns() { + if (!isNative) { + return null; + } + int[] bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap(); + if (bigTableValueColumnMap.length == 0) { + return null; + } + return Arrays.toString(bigTableValueColumnMap); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getSmallTableColumns() { + if (!isNative) { + return null; + } + return outputColumnsToStringList(vectorMapJoinInfo.getSmallTableMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + if (!isNative) { + return null; + } + return outputColumnsToStringList(vectorMapJoinInfo.getProjectionMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableOuterKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableOuterKey() { + if (!isNative || vectorMapJoinDesc.operatorVariation() != OperatorVariation.OUTER) { + return null; + } + return columnMappingToStringList(vectorMapJoinInfo.getBigTableOuterKeyMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainedColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableRetainedColumns() { + if (!isNative) { + return null; + } + return outputColumnsToStringList(vectorMapJoinInfo.getBigTableRetainedMapping()); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeNotSupportedKeyTypes() { + return vectorMapJoinDesc.getNotSupportedKeyTypes(); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MapJoinOperatorExplainVectorization getMapJoinVectorization() { + if (vectorDesc == null || this instanceof SMBJoinDesc) { + return null; + } + return new MapJoinOperatorExplainVectorization(this, vectorDesc); + } + + public class SMBJoinOperatorExplainVectorization extends OperatorExplainVectorization { + + private final SMBJoinDesc smbJoinDesc; + private final VectorSMBJoinDesc vectorSMBJoinDesc; + + public SMBJoinOperatorExplainVectorization(SMBJoinDesc smbJoinDesc, VectorDesc vectorDesc) { + // Native vectorization NOT supported. + super(vectorDesc, false); + this.smbJoinDesc = smbJoinDesc; + vectorSMBJoinDesc = (VectorSMBJoinDesc) vectorDesc; + } + } + + // Handle dual nature. + @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SMBJoinOperatorExplainVectorization getSMBJoinVectorization() { + if (vectorDesc == null || !(this instanceof SMBJoinDesc)) { + return null; + } + return new SMBJoinOperatorExplainVectorization((SMBJoinDesc) this, vectorDesc); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 601324a..d4bdd96 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -26,6 +26,7 @@ import java.util.BitSet; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -47,8 +48,10 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -140,6 +143,12 @@ private VectorizedRowBatch vectorizedRowBatch; + private VectorizerReason notEnabledInputFileFormatReason; + + private Set vectorizationInputFileFormatClassNameSet; + private List vectorizationEnabledConditionsMet; + private List vectorizationEnabledConditionsNotMet; + // bitsets can't be correctly serialized by Kryo's default serializer // BitSet::wordsInUse is transient, so force dumping into a lower form private byte[] includedBuckets; @@ -362,7 +371,7 @@ public void setAliasToWork( return nameToSplitSample; } - @Explain(displayName = "LLAP IO") + @Explain(displayName = "LLAP IO", vectorization = Vectorization.SUMMARY_PATH) public String getLlapIoDesc() { return llapIoDesc; } @@ -434,7 +443,8 @@ private void setAliases() { } } - @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public String getExecutionMode() { if (vectorMode) { if (llapMode) { @@ -464,7 +474,8 @@ public void replaceRoots(Map, Operator> replacementMap) { } @Override - @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.OPERATOR_PATH) public Set> getAllRootOperators() { Set> opSet = new LinkedHashSet>(); @@ -721,4 +732,86 @@ public void setVectorizedRowBatch(VectorizedRowBatch vectorizedRowBatch) { public VectorizedRowBatch getVectorizedRowBatch() { return vectorizedRowBatch; } + + /* + * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable + * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated + * vectorizing this node. + * + * When Vectorized Input File Format looks at this flag, it can determine whether it should + * operate vectorized or not. In some modes, the node can be vectorized but use row + * serialization. + */ + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } + + public boolean getUseVectorizedInputFileFormat() { + return useVectorizedInputFileFormat; + } + + public void setNotEnabledInputFileFormatReason(VectorizerReason notEnabledInputFileFormatReason) { + this.notEnabledInputFileFormatReason = notEnabledInputFileFormatReason; + } + + public VectorizerReason getNotEnabledInputFileFormatReason() { + return notEnabledInputFileFormatReason; + } + + public void setVectorizationInputFileFormatClassNameSet(Set vectorizationInputFileFormatClassNameSet) { + this.vectorizationInputFileFormatClassNameSet = vectorizationInputFileFormatClassNameSet; + } + + public Set getVectorizationInputFileFormatClassNameSet() { + return vectorizationInputFileFormatClassNameSet; + } + + public void setVectorizationEnabledConditionsMet(ArrayList vectorizationEnabledConditionsMet) { + this.vectorizationEnabledConditionsMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsMet, true); + } + + public List getVectorizationEnabledConditionsMet() { + return vectorizationEnabledConditionsMet; + } + + public void setVectorizationEnabledConditionsNotMet(List vectorizationEnabledConditionsNotMet) { + this.vectorizationEnabledConditionsNotMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsNotMet, false); + } + + public List getVectorizationEnabledConditionsNotMet() { + return vectorizationEnabledConditionsNotMet; + } + + public class MapExplainVectorization extends BaseExplainVectorization { + + private final MapWork mapWork; + + public MapExplainVectorization(MapWork mapWork) { + super(mapWork); + this.mapWork = mapWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "inputFileFormats", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Set inputFileFormats() { + return mapWork.getVectorizationInputFileFormatClassNameSet(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List enabledConditionsMet() { + return mapWork.getVectorizationEnabledConditionsMet(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List enabledConditionsNotMet() { + return mapWork.getVectorizationEnabledConditionsNotMet(); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Map Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MapExplainVectorization getMapExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new MapExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java index 82143a6..76b5138 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java @@ -32,13 +32,15 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * MapredLocalWork. * */ -@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class MapredLocalWork implements Serializable { private static final long serialVersionUID = 1L; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index aa7f6ed..af9adc2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -24,14 +24,15 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * MapredWork. * */ -@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class MapredWork extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @@ -40,7 +41,8 @@ private boolean finalMapRed; - @Explain(skipHeader = true, displayName = "Map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(skipHeader = true, displayName = "Map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public MapWork getMapWork() { return mapWork; } @@ -49,7 +51,8 @@ public void setMapWork(MapWork mapWork) { this.mapWork = mapWork; } - @Explain(skipHeader = true, displayName = "Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(skipHeader = true, displayName = "Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public ReduceWork getReduceWork() { return reduceWork; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java new file mode 100644 index 0000000..bdf9859 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnMapping; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +public class OperatorExplainVectorization { + + protected final VectorDesc vectorDesc; + + protected final boolean isNative; + + public OperatorExplainVectorization(VectorDesc vectorDesc, boolean isNative) { + this.vectorDesc = vectorDesc; + this.isNative = isNative; + } + + public List vectorExpressionsToStringList(VectorExpression[] vectorExpressions) { + if (vectorExpressions == null) { + return null; + } + List vecExprList = new ArrayList(vectorExpressions.length); + for (VectorExpression vecExpr : vectorExpressions) { + vecExprList.add(vecExpr.toString()); + } + return vecExprList; + } + + public String outputColumnsToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + return Arrays.toString(outputColumns); + } + + public List columnMappingToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] inputColumns = vectorColumnMapping.getInputColumns(); + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + ArrayList result = new ArrayList(size); + for (int i = 0; i < size; i++) { + result.add(inputColumns[i] + " -> " + outputColumns[i]); + } + return result; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "className", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getClassName() { + return vectorDesc.getVectorOpClass().getSimpleName(); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "native", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean getNative() { + return isNative; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index d7e404c..b8c2d42 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -19,11 +19,18 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.EnumSet; +import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc.ReduceSinkKeyType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -121,9 +128,6 @@ private ReducerTraits(int trait) { private static transient Logger LOG = LoggerFactory.getLogger(ReduceSinkDesc.class); - // Extra parameters only for vectorization. - private VectorReduceSinkDesc vectorDesc; - public ReduceSinkDesc() { } @@ -187,14 +191,6 @@ public Object clone() { return desc; } - public void setVectorDesc(VectorReduceSinkDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorReduceSinkDesc getVectorDesc() { - return vectorDesc; - } - public java.util.ArrayList getOutputKeyColumnNames() { return outputKeyColumnNames; } @@ -490,4 +486,105 @@ public void setHasOrderBy(boolean hasOrderBy) { this.hasOrderBy = hasOrderBy; } + // Use LinkedHashSet to give predictable display order. + private static Set vectorizableReduceSinkNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + + public class ReduceSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + private final ReduceSinkDesc reduceSinkDesc; + private final VectorReduceSinkDesc vectorReduceSinkDesc; + private final VectorReduceSinkInfo vectorReduceSinkInfo; + + private VectorizationCondition[] nativeConditions; + + public ReduceSinkOperatorExplainVectorization(ReduceSinkDesc reduceSinkDesc, VectorDesc vectorDesc) { + // VectorReduceSinkOperator is not native vectorized. + super(vectorDesc, ((VectorReduceSinkDesc) vectorDesc).reduceSinkKeyType()!= ReduceSinkKeyType.NONE); + this.reduceSinkDesc = reduceSinkDesc; + vectorReduceSinkDesc = (VectorReduceSinkDesc) vectorDesc; + vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo(); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeyExpression() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkKeyExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "valueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getValueExpression() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions()); + } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorReduceSinkDesc.getIsVectorizationReduceSinkNativeEnabled(); + + String engine = vectorReduceSinkDesc.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableReduceSinkNativeEngines; + boolean engineInSupported = vectorizableReduceSinkNativeEngines.contains(engine); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName), + new VectorizationCondition( + !vectorReduceSinkDesc.getAcidChange(), + "Not ACID UPDATE or DELETE"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasBuckets(), + "No buckets"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasTopN(), + "No TopN"), + new VectorizationCondition( + vectorReduceSinkDesc.getUseUniformHash(), + "Uniform Hash"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasDistinctColumns(), + "No DISTINCT columns"), + new VectorizationCondition( + vectorReduceSinkDesc.getIsKeyBinarySortable(), + "BinarySortableSerDe for keys"), + new VectorizationCondition( + vectorReduceSinkDesc.getIsValueLazyBinary(), + "LazyBinarySerDe for values") + }; + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Reduce Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public ReduceSinkOperatorExplainVectorization getReduceSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new ReduceSinkOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index 72fc4ca..f4ab2a0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -19,17 +19,23 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -89,6 +95,9 @@ public ReduceWork(String name) { private ObjectInspector keyObjectInspector = null; private ObjectInspector valueObjectInspector = null; + private boolean reduceVectorizationEnabled; + private String vectorReduceEngine; + /** * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing * to keySerializeInfo of the ReduceSink @@ -142,7 +151,8 @@ public void setTagToValueDesc(final List tagToValueDesc) { this.tagToValueDesc = tagToValueDesc; } - @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public String getExecutionMode() { if (vectorMode) { if (llapMode) { @@ -160,7 +170,8 @@ public String getExecutionMode() { return null; } - @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.OPERATOR_PATH) public Operator getReducer() { return reducer; } @@ -252,4 +263,81 @@ public int getMaxReduceTasks() { public void setMaxReduceTasks(int maxReduceTasks) { this.maxReduceTasks = maxReduceTasks; } + + public void setReduceVectorizationEnabled(boolean reduceVectorizationEnabled) { + this.reduceVectorizationEnabled = reduceVectorizationEnabled; + } + + public boolean getReduceVectorizationEnabled() { + return reduceVectorizationEnabled; + } + + public void setVectorReduceEngine(String vectorReduceEngine) { + this.vectorReduceEngine = vectorReduceEngine; + } + + public String getVectorReduceEngine() { + return vectorReduceEngine; + } + + // Use LinkedHashSet to give predictable display order. + private static Set reduceVectorizableEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + + public class ReduceExplainVectorization extends BaseExplainVectorization { + + private final ReduceWork reduceWork; + + private VectorizationCondition[] reduceVectorizationConditions; + + public ReduceExplainVectorization(ReduceWork reduceWork) { + super(reduceWork); + this.reduceWork = reduceWork; + } + + private VectorizationCondition[] createReduceExplainVectorizationConditions() { + + boolean enabled = reduceWork.getReduceVectorizationEnabled(); + + String engine = reduceWork.getVectorReduceEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + reduceVectorizableEngines; + + boolean engineInSupported = reduceVectorizableEngines.contains(engine); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName) + }; + return conditions; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsMet() { + if (reduceVectorizationConditions == null) { + reduceVectorizationConditions = createReduceExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsMet(reduceVectorizationConditions); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsNotMet() { + if (reduceVectorizationConditions == null) { + reduceVectorizationConditions = createReduceExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsNotMet(reduceVectorizationConditions); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Reduce Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public ReduceExplainVectorization getReduceExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new ReduceExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java index 4762408..a363f87 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java @@ -42,6 +42,11 @@ public SMBJoinDesc(MapJoinDesc conf) { } public SMBJoinDesc() { + super(); + } + + public SMBJoinDesc(SMBJoinDesc clone) { + super(clone); } public MapredLocalWork getLocalWork() { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java index 67a8327..0601ce0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java @@ -19,8 +19,11 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; + import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -135,4 +138,36 @@ public boolean isSelStarNoCompute() { public void setSelStarNoCompute(boolean selStarNoCompute) { this.selStarNoCompute = selStarNoCompute; } + + + public class SelectOperatorExplainVectorization extends OperatorExplainVectorization { + + private final SelectDesc selectDesc; + private final VectorSelectDesc vectorSelectDesc; + + public SelectOperatorExplainVectorization(SelectDesc selectDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.selectDesc = selectDesc; + vectorSelectDesc = (VectorSelectDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "selectExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getSelectExpressions() { + return vectorExpressionsToStringList(vectorSelectDesc.getSelectExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + return Arrays.toString(vectorSelectDesc.getProjectedOutputColumns()); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Select Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SelectOperatorExplainVectorization getSelectVectorization() { + if (vectorDesc == null) { + return null; + } + return new SelectOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java index 8833ae3..260bc07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; + /** * Map Join operator Descriptor implementation. * @@ -43,4 +46,26 @@ public byte getTag() { public void setTag(byte tag) { this.tag = tag; } + + public class SparkHashTableSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + private final HashTableSinkDesc filterDesc; + private final VectorSparkHashTableSinkDesc vectorHashTableSinkDesc; + + public SparkHashTableSinkOperatorExplainVectorization(HashTableSinkDesc filterDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.filterDesc = filterDesc; + vectorHashTableSinkDesc = (VectorSparkHashTableSinkDesc) vectorDesc; + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Spark Hash Table Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SparkHashTableSinkOperatorExplainVectorization getHashTableSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new SparkHashTableSinkOperatorExplainVectorization(this, vectorDesc); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java index bb5dd79..066e32d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; - import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -34,6 +33,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import com.google.common.base.Preconditions; @@ -43,7 +43,7 @@ * roots and and ReduceWork at all other nodes. */ @SuppressWarnings("serial") -@Explain(displayName = "Spark") +@Explain(displayName = "Spark", vectorization = Vectorization.SUMMARY_PATH) public class SparkWork extends AbstractOperatorDesc { private static int counter; private final String name; @@ -76,7 +76,7 @@ public String getName() { /** * @return a map of "vertex name" to BaseWork */ - @Explain(displayName = "Vertices") + @Explain(displayName = "Vertices", vectorization = Vectorization.SUMMARY_PATH) public Map getWorkMap() { Map result = new LinkedHashMap(); for (BaseWork w: getAllWork()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index b434cfa..3daa83f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.TableSample; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde.serdeConstants; /** @@ -415,4 +416,29 @@ public boolean isNeedSkipHeaderFooters() { return opProps; } + public class TableScanOperatorExplainVectorization extends OperatorExplainVectorization { + + private final TableScanDesc tableScanDesc; + private final VectorTableScanDesc vectorTableScanDesc; + + public TableScanOperatorExplainVectorization(TableScanDesc tableScanDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.tableScanDesc = tableScanDesc; + vectorTableScanDesc = (VectorTableScanDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + return Arrays.toString(vectorTableScanDesc.getProjectedOutputColumns()); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "TableScan Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public TableScanOperatorExplainVectorization getTableScanVectorization() { + if (vectorDesc == null) { + return null; + } + return new TableScanOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java index 7a70e6b..a037ea3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * TezWork. This class encapsulates all the work objects that can be executed @@ -49,7 +49,8 @@ * */ @SuppressWarnings("serial") -@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class TezWork extends AbstractOperatorDesc { public enum VertexType { @@ -107,7 +108,8 @@ public String getDagId() { /** * getWorkMap returns a map of "vertex name" to BaseWork */ - @Explain(displayName = "Vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public Map getWorkMap() { Map result = new LinkedHashMap(); for (BaseWork w: getAllWork()) { @@ -306,7 +308,8 @@ public int compareTo(Dependency o) { } } - @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public Map> getDependencyMap() { Map> result = new LinkedHashMap>(); for (Map.Entry> entry: invertedWorkGraph.entrySet()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java new file mode 100644 index 0000000..2e11321 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorAppMasterEventDesc. + * + * Extra parameters beyond AppMasterEventDesc just for the VectorAppMasterEventDescOperator. + * + * We don't extend AppMasterEventDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorAppMasterEventDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorAppMasterEventDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java index 3a2efdb..66e3275 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java @@ -21,5 +21,11 @@ import java.io.Serializable; public interface VectorDesc extends Serializable, Cloneable { - public Object clone() throws CloneNotSupportedException; + + public Object clone(); + + public void setVectorOp(Class vectorOpClass); + + public Class getVectorOpClass(); + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFileSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFileSinkDesc.java new file mode 100644 index 0000000..325ac91 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFileSinkDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorFileSinkDesc. + * + * Extra parameters beyond FileSinkDesc just for the VectorFileSinkOperator. + * + * We don't extend FileSinkDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorFileSinkDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorFileSinkDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFilterDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFilterDesc.java new file mode 100644 index 0000000..6feed84 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFilterDesc.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * VectorFilterDesc. + * + * Extra parameters beyond FilterDesc just for the VectorFilterOperator. + * + * We don't extend FilterDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorFilterDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + private VectorExpression predicateExpression; + + public VectorFilterDesc() { + } + + public void setPredicateExpression(VectorExpression predicateExpression) { + this.predicateExpression = predicateExpression; + } + + public VectorExpression getPredicateExpression() { + return predicateExpression; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index 08f8ebf..f8554e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; + /** * VectorGroupByDesc. * @@ -59,6 +62,10 @@ private boolean isVectorOutput; + private VectorExpression[] keyExpressions; + private VectorAggregateExpression[] aggregators; + private int[] projectedOutputColumns; + public VectorGroupByDesc() { this.processingMode = ProcessingMode.NONE; this.isVectorOutput = false; @@ -79,6 +86,30 @@ public void setVectorOutput(boolean isVectorOutput) { this.isVectorOutput = isVectorOutput; } + public void setKeyExpressions(VectorExpression[] keyExpressions) { + this.keyExpressions = keyExpressions; + } + + public VectorExpression[] getKeyExpressions() { + return keyExpressions; + } + + public void setAggregators(VectorAggregateExpression[] aggregators) { + this.aggregators = aggregators; + } + + public VectorAggregateExpression[] getAggregators() { + return aggregators; + } + + public void setProjectedOutputColumns(int[] projectedOutputColumns) { + this.projectedOutputColumns = projectedOutputColumns; + } + + public int[] getProjectedOutputColumns() { + return projectedOutputColumns; + } + /** * Which ProcessingMode for VectorGroupByOperator? * diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorLimitDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorLimitDesc.java new file mode 100644 index 0000000..c9bc45a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorLimitDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorLimitDesc. + * + * Extra parameters beyond LimitDesc just for the VectorLimitOperator. + * + * We don't extend LimitDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorLimitDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorLimitDesc() { + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 8ea230f..3aa65d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -18,9 +18,13 @@ package org.apache.hadoop.hive.ql.plan; +import java.util.List; + import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import com.google.common.base.Preconditions; + /** * VectorGroupByDesc. * @@ -79,23 +83,43 @@ public PrimitiveTypeInfo getPrimitiveTypeInfo() { } } + public static enum OperatorVariation { + NONE, + INNER_BIG_ONLY, + INNER, + LEFT_SEMI, + OUTER + } + private HashTableImplementationType hashTableImplementationType; private HashTableKind hashTableKind; private HashTableKeyType hashTableKeyType; + private OperatorVariation operatorVariation; private boolean minMaxEnabled; + private VectorMapJoinInfo vectorMapJoinInfo; + public VectorMapJoinDesc() { hashTableImplementationType = HashTableImplementationType.NONE; hashTableKind = HashTableKind.NONE; hashTableKeyType = HashTableKeyType.NONE; + operatorVariation = OperatorVariation.NONE; minMaxEnabled = false; + vectorMapJoinInfo = null; } - public VectorMapJoinDesc(VectorMapJoinDesc clone) { - this.hashTableImplementationType = clone.hashTableImplementationType; - this.hashTableKind = clone.hashTableKind; - this.hashTableKeyType = clone.hashTableKeyType; - this.minMaxEnabled = clone.minMaxEnabled; + @Override + public VectorMapJoinDesc clone() { + VectorMapJoinDesc clone = new VectorMapJoinDesc(); + clone.hashTableImplementationType = this.hashTableImplementationType; + clone.hashTableKind = this.hashTableKind; + clone.hashTableKeyType = this.hashTableKeyType; + clone.operatorVariation = this.operatorVariation; + clone.minMaxEnabled = this.minMaxEnabled; + if (vectorMapJoinInfo != null) { + throw new RuntimeException("Cloning VectorMapJoinInfo not supported"); + } + return clone; } public HashTableImplementationType hashTableImplementationType() { @@ -122,6 +146,14 @@ public void setHashTableKeyType(HashTableKeyType hashTableKeyType) { this.hashTableKeyType = hashTableKeyType; } + public OperatorVariation operatorVariation() { + return operatorVariation; + } + + public void setOperatorVariation(OperatorVariation operatorVariation) { + this.operatorVariation = operatorVariation; + } + public boolean minMaxEnabled() { return minMaxEnabled; } @@ -129,4 +161,87 @@ public boolean minMaxEnabled() { public void setMinMaxEnabled(boolean minMaxEnabled) { this.minMaxEnabled = minMaxEnabled; } + + public void setVectorMapJoinInfo(VectorMapJoinInfo vectorMapJoinInfo) { + Preconditions.checkState(vectorMapJoinInfo != null); + this.vectorMapJoinInfo = vectorMapJoinInfo; + } + + public VectorMapJoinInfo getVectorMapJoinInfo() { + return vectorMapJoinInfo; + } + + private boolean useOptimizedTable; + private boolean isVectorizationMapJoinNativeEnabled; + private String engine; + private boolean oneMapJoinCondition; + private boolean hasNullSafes; + private boolean isFastHashTableEnabled; + private boolean isHybridHashJoin; + private boolean supportsKeyTypes; + private List notSupportedKeyTypes; + private boolean smallTableExprVectorizes; + + public void setUseOptimizedTable(boolean useOptimizedTable) { + this.useOptimizedTable = useOptimizedTable; + } + public boolean getUseOptimizedTable() { + return useOptimizedTable; + } + public void setIsVectorizationMapJoinNativeEnabled(boolean isVectorizationMapJoinNativeEnabled) { + this.isVectorizationMapJoinNativeEnabled = isVectorizationMapJoinNativeEnabled; + } + public boolean getIsVectorizationMapJoinNativeEnabled() { + return isVectorizationMapJoinNativeEnabled; + } + public void setEngine(String engine) { + this.engine = engine; + } + public String getEngine() { + return engine; + } + public void setOneMapJoinCondition(boolean oneMapJoinCondition) { + this.oneMapJoinCondition = oneMapJoinCondition; + } + public boolean getOneMapJoinCondition() { + return oneMapJoinCondition; + } + public void setHasNullSafes(boolean hasNullSafes) { + this.hasNullSafes = hasNullSafes; + } + public boolean getHasNullSafes() { + return hasNullSafes; + } + public void setSupportsKeyTypes(boolean supportsKeyTypes) { + this.supportsKeyTypes = supportsKeyTypes; + } + public boolean getSupportsKeyTypes() { + return supportsKeyTypes; + } + public void setNotSupportedKeyTypes(List notSupportedKeyTypes) { + this.notSupportedKeyTypes = notSupportedKeyTypes; + } + public List getNotSupportedKeyTypes() { + return notSupportedKeyTypes; + } + public void setSmallTableExprVectorizes(boolean smallTableExprVectorizes) { + this.smallTableExprVectorizes = smallTableExprVectorizes; + } + public boolean getSmallTableExprVectorizes() { + return smallTableExprVectorizes; + } + + public void setIsFastHashTableEnabled(boolean isFastHashTableEnabled) { + this.isFastHashTableEnabled = isFastHashTableEnabled; + } + public boolean getIsFastHashTableEnabled() { + return isFastHashTableEnabled; + } + public void setIsHybridHashJoin(boolean isHybridHashJoin) { + this.isHybridHashJoin = isHybridHashJoin; + } + public boolean getIsHybridHashJoin() { + return isHybridHashJoin; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java new file mode 100644 index 0000000..9429785 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * VectorMapJoinInfo. + * + * A convenience data structure that has information needed to vectorize map join. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and again by the VectorMapJoinOperator's + * constructors and later during execution. + */ +public class VectorMapJoinInfo { + + private static long serialVersionUID = 1L; + + private int[] bigTableKeyColumnMap; + private String[] bigTableKeyColumnNames; + private TypeInfo[] bigTableKeyTypeInfos; + private VectorExpression[] bigTableKeyExpressions; + + private int[] bigTableValueColumnMap; + private String[] bigTableValueColumnNames; + private TypeInfo[] bigTableValueTypeInfos; + private VectorExpression[] bigTableValueExpressions; + + private VectorColumnOutputMapping bigTableRetainedMapping; + private VectorColumnOutputMapping bigTableOuterKeyMapping; + private VectorColumnSourceMapping smallTableMapping; + + private VectorColumnSourceMapping projectionMapping; + + public VectorMapJoinInfo() { + bigTableKeyColumnMap = null; + bigTableKeyColumnNames = null; + bigTableKeyTypeInfos = null; + bigTableKeyExpressions = null; + + bigTableValueColumnMap = null; + bigTableValueColumnNames = null; + bigTableValueTypeInfos = null; + bigTableValueExpressions = null; + + bigTableRetainedMapping = null; + bigTableOuterKeyMapping = null; + smallTableMapping = null; + + projectionMapping = null; + } + + public int[] getBigTableKeyColumnMap() { + return bigTableKeyColumnMap; + } + + public void setBigTableKeyColumnMap(int[] bigTableKeyColumnMap) { + this.bigTableKeyColumnMap = bigTableKeyColumnMap; + } + + public String[] getBigTableKeyColumnNames() { + return bigTableKeyColumnNames; + } + + public void setBigTableKeyColumnNames(String[] bigTableKeyColumnNames) { + this.bigTableKeyColumnNames = bigTableKeyColumnNames; + } + + public TypeInfo[] getBigTableKeyTypeInfos() { + return bigTableKeyTypeInfos; + } + + public void setBigTableKeyTypeInfos(TypeInfo[] bigTableKeyTypeInfos) { + this.bigTableKeyTypeInfos = bigTableKeyTypeInfos; + } + + public VectorExpression[] getBigTableKeyExpressions() { + return bigTableKeyExpressions; + } + + public void setBigTableKeyExpressions(VectorExpression[] bigTableKeyExpressions) { + this.bigTableKeyExpressions = bigTableKeyExpressions; + } + + + public int[] getBigTableValueColumnMap() { + return bigTableValueColumnMap; + } + + public void setBigTableValueColumnMap(int[] bigTableValueColumnMap) { + this.bigTableValueColumnMap = bigTableValueColumnMap; + } + + public String[] getBigTableValueColumnNames() { + return bigTableValueColumnNames; + } + + public void setBigTableValueColumnNames(String[] bigTableValueColumnNames) { + this.bigTableValueColumnNames = bigTableValueColumnNames; + } + + public TypeInfo[] getBigTableValueTypeInfos() { + return bigTableValueTypeInfos; + } + + public void setBigTableValueTypeInfos(TypeInfo[] bigTableValueTypeInfos) { + this.bigTableValueTypeInfos = bigTableValueTypeInfos; + } + + public VectorExpression[] getBigTableValueExpressions() { + return bigTableValueExpressions; + } + + public void setBigTableValueExpressions(VectorExpression[] bigTableValueExpressions) { + this.bigTableValueExpressions = bigTableValueExpressions; + } + + public void setBigTableRetainedMapping(VectorColumnOutputMapping bigTableRetainedMapping) { + this.bigTableRetainedMapping = bigTableRetainedMapping; + } + + public VectorColumnOutputMapping getBigTableRetainedMapping() { + return bigTableRetainedMapping; + } + + public void setBigTableOuterKeyMapping(VectorColumnOutputMapping bigTableOuterKeyMapping) { + this.bigTableOuterKeyMapping = bigTableOuterKeyMapping; + } + + public VectorColumnOutputMapping getBigTableOuterKeyMapping() { + return bigTableOuterKeyMapping; + } + + public void setSmallTableMapping(VectorColumnSourceMapping smallTableMapping) { + this.smallTableMapping = smallTableMapping; + } + + public VectorColumnSourceMapping getSmallTableMapping() { + return smallTableMapping; + } + + public void setProjectionMapping(VectorColumnSourceMapping projectionMapping) { + this.projectionMapping = projectionMapping; + } + + public VectorColumnSourceMapping getProjectionMapping() { + return projectionMapping; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java index c56bff6..288a440 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java @@ -61,4 +61,72 @@ public void setVectorReduceSinkInfo(VectorReduceSinkInfo vectorReduceSinkInfo) { public VectorReduceSinkInfo getVectorReduceSinkInfo() { return vectorReduceSinkInfo; } + + private boolean isVectorizationReduceSinkNativeEnabled; + private String engine; + private boolean acidChange; + private boolean hasBuckets; + private boolean hasTopN; + private boolean useUniformHash; + private boolean hasDistinctColumns; + private boolean isKeyBinarySortable; + private boolean isValueLazyBinary; + + /* + * The following conditions are for native Vector ReduceSink. + */ + public void setIsVectorizationReduceSinkNativeEnabled(boolean isVectorizationReduceSinkNativeEnabled) { + this.isVectorizationReduceSinkNativeEnabled = isVectorizationReduceSinkNativeEnabled; + } + public boolean getIsVectorizationReduceSinkNativeEnabled() { + return isVectorizationReduceSinkNativeEnabled; + } + public void setEngine(String engine) { + this.engine = engine; + } + public String getEngine() { + return engine; + } + public void setAcidChange(boolean acidChange) { + this.acidChange = acidChange; + } + public boolean getAcidChange() { + return acidChange; + } + public void setHasBuckets(boolean hasBuckets) { + this.hasBuckets = hasBuckets; + } + public boolean getHasBuckets() { + return hasBuckets; + } + public void setHasTopN(boolean hasTopN) { + this.hasTopN = hasTopN; + } + public boolean getHasTopN() { + return hasTopN; + } + public void setUseUniformHash(boolean useUniformHash) { + this.useUniformHash = useUniformHash; + } + public boolean getUseUniformHash() { + return useUniformHash; + } + public void setHasDistinctColumns(boolean hasDistinctColumns) { + this.hasDistinctColumns = hasDistinctColumns; + } + public boolean getHasDistinctColumns() { + return hasDistinctColumns; + } + public void setIsKeyBinarySortable(boolean isKeyBinarySortable) { + this.isKeyBinarySortable = isKeyBinarySortable; + } + public boolean getIsKeyBinarySortable() { + return isKeyBinarySortable; + } + public void setIsValueLazyBinary(boolean isValueLazyBinary) { + this.isValueLazyBinary = isValueLazyBinary; + } + public boolean getIsValueLazyBinary() { + return isValueLazyBinary; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSMBJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSMBJoinDesc.java new file mode 100644 index 0000000..031f11e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSMBJoinDesc.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorSMBMapJoinDesc. + * + * Extra parameters beyond SMBMapJoinDesc just for the VectorSMBMapJoinOperator. + * + * We don't extend SMBMapJoinDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorSMBJoinDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorSMBJoinDesc() { + } + + @Override + public VectorSMBJoinDesc clone() { + VectorSMBJoinDesc clone = new VectorSMBJoinDesc(); + return clone; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSelectDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSelectDesc.java new file mode 100644 index 0000000..c2c9450 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSelectDesc.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * VectorSelectDesc. + * + * Extra parameters beyond SelectDesc just for the VectorSelectOperator. + * + * We don't extend SelectDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorSelectDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + private VectorExpression[] selectExpressions; + private int[] projectedOutputColumns; + + public VectorSelectDesc() { + } + + public void setSelectExpressions(VectorExpression[] selectExpressions) { + this.selectExpressions = selectExpressions; + } + + public VectorExpression[] getSelectExpressions() { + return selectExpressions; + } + + public void setProjectedOutputColumns(int[] projectedOutputColumns) { + this.projectedOutputColumns = projectedOutputColumns; + } + + public int[] getProjectedOutputColumns() { + return projectedOutputColumns; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSparkHashTableSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSparkHashTableSinkDesc.java new file mode 100644 index 0000000..7fb59db --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSparkHashTableSinkDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorHashTableSinkDesc. + * + * Extra parameters beyond HashTableSinkDesc just for the VectorHashTableSinkOperator. + * + * We don't extend HashTableSinkDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorSparkHashTableSinkDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorSparkHashTableSinkDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSparkPartitionPruningSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSparkPartitionPruningSinkDesc.java new file mode 100644 index 0000000..c0bc7e4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSparkPartitionPruningSinkDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorHashTableSinkDesc. + * + * Extra parameters beyond HashTableSinkDesc just for the VectorHashTableSinkOperator. + * + * We don't extend SparkPartitionPruningSinkDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorSparkPartitionPruningSinkDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorSparkPartitionPruningSinkDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTableScanDesc.java new file mode 100644 index 0000000..6e5ebe4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTableScanDesc.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorTableScanDesc. + * + * Extra parameters beyond TableScanDesc just for the VectorTableScanOperator. + * + * We don't extend TableScanDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorTableScanDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + private int[] projectedOutputColumns; + + public VectorTableScanDesc() { + } + + public void setProjectedOutputColumns(int[] projectedOutputColumns) { + this.projectedOutputColumns = projectedOutputColumns; + } + + public int[] getProjectedOutputColumns() { + return projectedOutputColumns; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorizationCondition.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorizationCondition.java new file mode 100644 index 0000000..32b62e8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorizationCondition.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class VectorizationCondition { + + private final boolean flag; + private final String conditionName; + + public VectorizationCondition(boolean flag, String conditionName) { + this.flag = flag; + this.conditionName = conditionName; + } + + public boolean getFlag() { + return flag; + } + + public String getConditionName() { + return conditionName; + } + + public static List getConditionsMet(VectorizationCondition[] conditions) { + List metList = new ArrayList(); + for (VectorizationCondition condition : conditions) { + if (condition.getFlag()) { + metList.add(condition.getConditionName() + " IS true"); + } + } + return metList; + } + + public static List getConditionsNotMet(VectorizationCondition[] conditions) { + List notMetList = new ArrayList(); + for (VectorizationCondition condition : conditions) { + if (!condition.getFlag()) { + notMetList.add(condition.getConditionName() + " IS false"); + } + } + return notMetList; + } + + public static List addBooleans(List conditions, boolean flag) { + ArrayList result = new ArrayList(conditions.size()); + for (String condition : conditions) { + result.add(condition + " IS " + flag); + } + return result; + } + + // Helper method. + public static List getConditionsSupported(boolean isSupported) { + return Arrays.asList("Supported IS " + isSupported); + } + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java index d3bb84d..22b845d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -25,13 +25,19 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; import org.junit.Test; /** @@ -89,10 +95,15 @@ private VectorFilterOperator getAVectorFilterOperator() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); List columns = new ArrayList(); columns.add("col1"); - VectorizationContext vc = new VectorizationContext("name", columns); FilterDesc fdesc = new FilterDesc(); fdesc.setPredicate(col1Expr); - return new VectorFilterOperator(new CompilationOpContext(), vc, fdesc); + + Operator filterOp = + OperatorFactory.get(new CompilationOpContext(), fdesc); + + VectorizationContext vc = new VectorizationContext("name", columns); + + return (VectorFilterOperator) Vectorizer.vectorizeFilterOperator(filterOp, vc); } @Test diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index 0ddebf8..0bc690f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -39,16 +39,20 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromConcat; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromLongIterables; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromRepeats; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -128,9 +132,11 @@ private static GroupByDesc buildGroupByDescType( outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); - desc.getVectorDesc().setProcessingMode(ProcessingMode.GLOBAL); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.GLOBAL); return desc; } @@ -146,6 +152,8 @@ private static GroupByDesc buildGroupByDescCountStar( outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); @@ -162,7 +170,7 @@ private static GroupByDesc buildKeyGroupByDesc( TypeInfo keyTypeInfo) { GroupByDesc desc = buildGroupByDescType(ctx, aggregate, GenericUDAFEvaluator.Mode.PARTIAL1, column, dataTypeInfo); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); ExprNodeDesc keyExp = buildColumnDesc(ctx, key, keyTypeInfo); ArrayList keys = new ArrayList(); @@ -196,7 +204,11 @@ public void testMemoryPressureFlush() throws HiveException { desc.setMemoryThreshold(treshold); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -1735,13 +1747,19 @@ private void testMultiKey( } GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); desc.setKeys(keysDesc); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -1846,9 +1864,11 @@ private void testKeyTypeAggregate( outputColumnNames.add("_col1"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0])); @@ -1857,7 +1877,11 @@ private void testKeyTypeAggregate( desc.setKeys(keysDesc); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2252,10 +2276,14 @@ public void testAggregateCountStarIterable ( VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescCountStar (ctx); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2283,10 +2311,14 @@ public void testAggregateCountReduceIterable ( VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, "count", GenericUDAFEvaluator.Mode.FINAL, "A", TypeInfoFactory.longTypeInfo); - VectorGroupByDesc vectorDesc = desc.getVectorDesc(); + VectorGroupByDesc vectorDesc = (VectorGroupByDesc) desc.getVectorDesc(); vectorDesc.setProcessingMode(ProcessingMode.GLOBAL); // Use GLOBAL when no key for Reduce. CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2318,7 +2350,11 @@ public void testAggregateStringIterable ( TypeInfoFactory.stringTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2350,7 +2386,11 @@ public void testAggregateDecimalIterable ( buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2383,7 +2423,11 @@ public void testAggregateDoubleIterable ( TypeInfoFactory.doubleTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2414,7 +2458,11 @@ public void testAggregateLongIterable ( GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.longTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(null, null); @@ -2449,7 +2497,11 @@ public void testAggregateLongKeyIterable ( TypeInfoFactory.longTypeInfo, "Key", TypeInfoFactory.longTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2515,7 +2567,11 @@ public void testAggregateStringKeyIterable ( dataTypeInfo, "Key", TypeInfoFactory.stringTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 779177a..614b1d1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -26,6 +26,7 @@ import java.util.Map; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -51,6 +53,7 @@ public ValidatorVectorSelectOperator(CompilationOpContext ctx, VectorizationContext ctxt, OperatorDesc conf) throws HiveException { super(ctx, ctxt, conf); + initializeOp(null); } @@ -115,6 +118,19 @@ public void testSelectOperator() throws HiveException { outputColNames.add("_col1"); selDesc.setOutputColumnNames(outputColNames); + // CONSIDER unwinding ValidatorVectorSelectOperator as a subclass of VectorSelectOperator. + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + selDesc.setVectorDesc(vectorSelectDesc); + List selectColList = selDesc.getColList(); + VectorExpression[] vectorSelectExprs = new VectorExpression[selectColList.size()]; + for (int i = 0; i < selectColList.size(); i++) { + ExprNodeDesc expr = selectColList.get(i); + VectorExpression ve = vc.getVectorExpression(expr); + vectorSelectExprs[i] = ve; + } + vectorSelectDesc.setSelectExpressions(vectorSelectExprs); + vectorSelectDesc.setProjectedOutputColumns(new int[] {3, 2}); + ValidatorVectorSelectOperator vso = new ValidatorVectorSelectOperator( new CompilationOpContext(), vc, selDesc); diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index 889f00a..ffd8546 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -101,6 +101,8 @@ public void testAggregateOnUDF() throws HiveException { outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); ArrayList aggDescList = new ArrayList(); aggDescList.add(aggDesc); @@ -111,13 +113,14 @@ public void testAggregateOnUDF() throws HiveException { grpByKeys.add(colExprB); desc.setKeys(grpByKeys); - GroupByOperator gbyOp = new GroupByOperator(new CompilationOpContext()); - gbyOp.setConf(desc); + Operator gbyOp = OperatorFactory.get(new CompilationOpContext(), desc); + desc.setMode(GroupByDesc.Mode.HASH); Vectorizer v = new Vectorizer(); + v.testSetCurrentBaseWork(new MapWork()); Assert.assertTrue(v.validateMapWorkOperator(gbyOp, null, false)); - VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext, false); + VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext, false, null); Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass()); VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0]; Assert.assertEquals(FuncAbsLongToLong.class, udaf.getInputExpression().getClass()); @@ -152,8 +155,9 @@ public void testValidateNestedExpressions() { andExprDesc.setChildren(children3); Vectorizer v = new Vectorizer(); - Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.FILTER)); - Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION)); + v.testSetCurrentBaseWork(new MapWork()); + Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.FILTER)); + Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.PROJECTION)); } /** @@ -201,6 +205,7 @@ public void testValidateMapJoinOperator() { map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); + vectorizer.testSetCurrentBaseWork(new MapWork()); Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } @@ -217,6 +222,7 @@ public void testValidateSMBJoinOperator() { map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); + vectorizer.testSetCurrentBaseWork(new MapWork()); Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } @@ -224,8 +230,8 @@ public void testValidateSMBJoinOperator() { public void testExprNodeDynamicValue() { ExprNodeDesc exprNode = new ExprNodeDynamicValueDesc(new DynamicValue("id1", TypeInfoFactory.stringTypeInfo)); Vectorizer v = new Vectorizer(); - Assert.assertTrue(v.validateExprNodeDesc(exprNode, Mode.FILTER)); - Assert.assertTrue(v.validateExprNodeDesc(exprNode, Mode.PROJECTION)); + Assert.assertTrue(v.validateExprNodeDesc(exprNode, "Test", Mode.FILTER)); + Assert.assertTrue(v.validateExprNodeDesc(exprNode, "Test", Mode.PROJECTION)); } @Test @@ -247,6 +253,6 @@ public void testExprNodeBetweenWithDynamicValue() { betweenExpr.setChildren(children1); Vectorizer v = new Vectorizer(); - Assert.assertTrue(v.validateExprNodeDesc(betweenExpr, Mode.FILTER)); + Assert.assertTrue(v.validateExprNodeDesc(betweenExpr, "Test", Mode.FILTER)); } } diff --git ql/src/test/queries/clientpositive/llap_text.q ql/src/test/queries/clientpositive/llap_text.q index 2c93897..0441336 100644 --- ql/src/test/queries/clientpositive/llap_text.q +++ ql/src/test/queries/clientpositive/llap_text.q @@ -6,6 +6,7 @@ set hive.fetch.task.conversion=none; SET hive.llap.io.enabled=false; set hive.llap.cache.allow.synthetic.fileid=true; +-- SORT_QUERY_RESULTS DROP TABLE text_llap; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_part.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_part.q index 7e66cbc..48903d2 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_part.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_part.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -38,7 +38,7 @@ alter table part_add_int_permute_select add columns(c int); insert into table part_add_int_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -61,7 +61,7 @@ alter table part_add_int_string_permute_select add columns(c int, d string); insert into table part_add_int_string_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333, '4444'); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -93,7 +93,7 @@ alter table part_change_string_group_double replace columns (insert_num int, c1 insert into table part_change_string_group_double partition(part=1) SELECT insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE insert_num = 111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double; select insert_num,part,c1,c2,c3,b from part_change_string_group_double; @@ -116,7 +116,7 @@ alter table part_change_date_group_string_group_date_timestamp replace columns(i insert into table part_change_date_group_string_group_date_timestamp partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; @@ -164,7 +164,7 @@ insert into table part_change_numeric_group_string_group_multi_ints_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; @@ -207,7 +207,7 @@ insert into table part_change_numeric_group_string_group_floating_string_group p 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; @@ -249,7 +249,7 @@ insert into table part_change_string_group_string_group_string partition(part=1) 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; @@ -299,7 +299,7 @@ insert into table part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa 1234.5678, 9876.543, 789.321, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; @@ -330,7 +330,7 @@ alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace c insert into table part_change_lower_to_higher_numeric_group_decimal_to_float partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_complex.q index ac747e6..45afd9d 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_complex.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -51,7 +51,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct1_c.txt' insert into table part_change_various_various_struct1 partition(part=1) select * from complex_struct1_c_txt; -explain + explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1; select insert_num,part,s1,b from part_change_various_various_struct1; @@ -111,7 +111,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct2_d.txt' insert into table part_add_various_various_struct2 partition(part=1) select * from complex_struct2_d_txt; -explain +explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2; select insert_num,part,b,s2 from part_add_various_various_struct2; @@ -155,7 +155,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct4_c.txt' insert into table part_add_to_various_various_struct4 partition(part=1) select * from complex_struct4_c_txt; -explain +explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4; select insert_num,part,b,s3 from part_add_to_various_various_struct4; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q index d3898a8..b266a67 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -72,7 +72,7 @@ insert into table part_change_various_various_boolean_to_bigint partition(part=1 bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, 'new' FROM schema_evolution_data; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; @@ -114,7 +114,7 @@ insert into table part_change_various_various_decimal_to_double partition(part=1 double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; @@ -138,7 +138,7 @@ alter table part_change_various_various_timestamp replace columns (insert_num in insert into table part_change_various_various_timestamp partition(part=1) SELECT insert_num, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; @@ -159,7 +159,7 @@ alter table part_change_various_various_date replace columns (insert_num int, c1 insert into table part_change_various_various_date partition(part=1) SELECT insert_num, date1, date1, date1, date1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; @@ -198,7 +198,7 @@ load data local inpath '../../data/files/schema_evolution/same_type1_c.txt' over insert into table part_change_same_type_different_params partition(part=2) select * from same_type1_c_txt; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_table.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_table.q index ffaa07b..866942e 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_table.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_table.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=true; @@ -36,7 +36,7 @@ alter table table_add_int_permute_select add columns(c int); insert into table table_add_int_permute_select VALUES (111, 80000, 'new', 80000); -explain +explain vectorization detail select insert_num,a,b from table_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -59,7 +59,7 @@ alter table table_add_int_string_permute_select add columns(c int, d string); insert into table table_add_int_string_permute_select VALUES (111, 80000, 'new', 80000, 'filler'); -explain +explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -91,7 +91,7 @@ alter table table_change_string_group_double replace columns (insert_num int, c1 insert into table table_change_string_group_double VALUES (111, 789.321, 789.321, 789.321, 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double; select insert_num,c1,c2,c3,b from table_change_string_group_double; @@ -158,7 +158,7 @@ insert into table table_change_numeric_group_string_group_multi_ints_string_grou 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; @@ -201,7 +201,7 @@ insert into table table_change_numeric_group_string_group_floating_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_part.q ql/src/test/queries/clientpositive/schema_evol_text_vec_part.q index 6582035..77c863a 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_part.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_part.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -39,7 +39,7 @@ alter table part_add_int_permute_select add columns(c int); insert into table part_add_int_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -62,7 +62,7 @@ alter table part_add_int_string_permute_select add columns(c int, d string); insert into table part_add_int_string_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333, '4444'); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -94,7 +94,7 @@ alter table part_change_string_group_double replace columns (insert_num int, c1 insert into table part_change_string_group_double partition(part=1) SELECT insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE insert_num = 111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double; select insert_num,part,c1,c2,c3,b from part_change_string_group_double; @@ -117,7 +117,7 @@ alter table part_change_date_group_string_group_date_timestamp replace columns(i insert into table part_change_date_group_string_group_date_timestamp partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; @@ -165,7 +165,7 @@ insert into table part_change_numeric_group_string_group_multi_ints_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; @@ -208,7 +208,7 @@ insert into table part_change_numeric_group_string_group_floating_string_group p 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; @@ -250,7 +250,7 @@ insert into table part_change_string_group_string_group_string partition(part=1) 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; @@ -300,7 +300,7 @@ insert into table part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa 1234.5678, 9876.543, 789.321, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; @@ -331,7 +331,7 @@ alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace c insert into table part_change_lower_to_higher_numeric_group_decimal_to_float partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q index e38a01e..7eb72e0 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -53,7 +53,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct1_c.txt' insert into table part_change_various_various_struct1 partition(part=1) select * from complex_struct1_c_txt; -explain +explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1; select insert_num,part,s1,b from part_change_various_various_struct1; @@ -113,7 +113,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct2_d.txt' insert into table part_add_various_various_struct2 partition(part=1) select * from complex_struct2_d_txt; -explain +explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2; select insert_num,part,b,s2 from part_add_various_various_struct2; @@ -157,7 +157,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct4_c.txt' insert into table part_add_to_various_various_struct4 partition(part=1) select * from complex_struct4_c_txt; -explain +explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4; select insert_num,part,b,s3 from part_add_to_various_various_struct4; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_primitive.q ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_primitive.q index c9d90c3..d5c01cd 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_primitive.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_primitive.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -74,7 +74,7 @@ insert into table part_change_various_various_boolean_to_bigint partition(part=1 bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, 'new' FROM schema_evolution_data; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; @@ -116,7 +116,7 @@ insert into table part_change_various_various_decimal_to_double partition(part=1 double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; @@ -140,7 +140,7 @@ alter table part_change_various_various_timestamp replace columns (insert_num in insert into table part_change_various_various_timestamp partition(part=1) SELECT insert_num, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; @@ -161,7 +161,7 @@ alter table part_change_various_various_date replace columns (insert_num int, c1 insert into table part_change_various_various_date partition(part=1) SELECT insert_num, date1, date1, date1, date1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; @@ -200,7 +200,7 @@ load data local inpath '../../data/files/schema_evolution/same_type1_c.txt' over insert into table part_change_same_type_different_params partition(part=2) select * from same_type1_c_txt; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q index 7785f87..bbf03af 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=false; @@ -38,7 +38,7 @@ alter table table_add_int_permute_select add columns(c int); insert into table table_add_int_permute_select VALUES (111, 80000, 'new', 80000); -explain +explain vectorization detail select insert_num,a,b from table_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -61,7 +61,7 @@ alter table table_add_int_string_permute_select add columns(c int, d string); insert into table table_add_int_string_permute_select VALUES (111, 80000, 'new', 80000, 'filler'); -explain +explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -93,7 +93,7 @@ alter table table_change_string_group_double replace columns (insert_num int, c1 insert into table table_change_string_group_double VALUES (111, 789.321, 789.321, 789.321, 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double; select insert_num,c1,c2,c3,b from table_change_string_group_double; @@ -160,7 +160,7 @@ insert into table table_change_numeric_group_string_group_multi_ints_string_grou 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; @@ -203,7 +203,7 @@ insert into table table_change_numeric_group_string_group_floating_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q index 8ed041b..11df12e 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -39,7 +39,7 @@ alter table part_add_int_permute_select add columns(c int); insert into table part_add_int_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -62,7 +62,7 @@ alter table part_add_int_string_permute_select add columns(c int, d string); insert into table part_add_int_string_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333, '4444'); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -94,7 +94,7 @@ alter table part_change_string_group_double replace columns (insert_num int, c1 insert into table part_change_string_group_double partition(part=1) SELECT insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE insert_num = 111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double; select insert_num,part,c1,c2,c3,b from part_change_string_group_double; @@ -117,7 +117,7 @@ alter table part_change_date_group_string_group_date_timestamp replace columns(i insert into table part_change_date_group_string_group_date_timestamp partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; @@ -165,7 +165,7 @@ insert into table part_change_numeric_group_string_group_multi_ints_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; @@ -208,7 +208,7 @@ insert into table part_change_numeric_group_string_group_floating_string_group p 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; @@ -250,7 +250,7 @@ insert into table part_change_string_group_string_group_string partition(part=1) 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; @@ -300,7 +300,7 @@ insert into table part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa 1234.5678, 9876.543, 789.321, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; @@ -331,7 +331,7 @@ alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace c insert into table part_change_lower_to_higher_numeric_group_decimal_to_float partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q index b9d035e..dfd55d9 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -54,7 +54,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct1_c.txt' insert into table part_change_various_various_struct1 partition(part=1) select * from complex_struct1_c_txt; -explain +explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1; select insert_num,part,s1,b from part_change_various_various_struct1; @@ -114,7 +114,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct2_d.txt' insert into table part_add_various_various_struct2 partition(part=1) select * from complex_struct2_d_txt; -explain +explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2; select insert_num,part,b,s2 from part_add_various_various_struct2; @@ -158,7 +158,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct4_c.txt' insert into table part_add_to_various_various_struct4 partition(part=1) select * from complex_struct4_c_txt; -explain +explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4; select insert_num,part,b,s3 from part_add_to_various_various_struct4; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q index f5b0485..d71c6b8 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -74,7 +74,7 @@ insert into table part_change_various_various_boolean_to_bigint partition(part=1 bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, 'new' FROM schema_evolution_data; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; @@ -116,7 +116,7 @@ insert into table part_change_various_various_decimal_to_double partition(part=1 double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; @@ -140,7 +140,7 @@ alter table part_change_various_various_timestamp replace columns (insert_num in insert into table part_change_various_various_timestamp partition(part=1) SELECT insert_num, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; @@ -161,7 +161,7 @@ alter table part_change_various_various_date replace columns (insert_num int, c1 insert into table part_change_various_various_date partition(part=1) SELECT insert_num, date1, date1, date1, date1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; @@ -200,7 +200,7 @@ load data local inpath '../../data/files/schema_evolution/same_type1_c.txt' over insert into table part_change_same_type_different_params partition(part=2) select * from same_type1_c_txt; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q index c8d5458..d4209a5 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=false; @@ -38,7 +38,7 @@ alter table table_add_int_permute_select add columns(c int); insert into table table_add_int_permute_select VALUES (111, 80000, 'new', 80000); -explain +explain vectorization detail select insert_num,a,b from table_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -61,7 +61,7 @@ alter table table_add_int_string_permute_select add columns(c int, d string); insert into table table_add_int_string_permute_select VALUES (111, 80000, 'new', 80000, 'filler'); -explain +explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -93,7 +93,7 @@ alter table table_change_string_group_double replace columns (insert_num int, c1 insert into table table_change_string_group_double VALUES (111, 789.321, 789.321, 789.321, 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double; select insert_num,c1,c2,c3,b from table_change_string_group_double; @@ -160,7 +160,7 @@ insert into table table_change_numeric_group_string_group_multi_ints_string_grou 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; @@ -203,7 +203,7 @@ insert into table table_change_numeric_group_string_group_floating_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; diff --git ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q index cef4e4c..449bea2 100644 --- ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q +++ ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q @@ -32,7 +32,7 @@ insert into table count_case_groupby values ('key1', true),('key2', false),('key set hive.vectorized.adaptor.usage.mode=none; -explain +explain vectorization expression select c2 regexp 'val', c4 regexp 'val', @@ -45,7 +45,7 @@ select (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), @@ -58,7 +58,7 @@ select regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), @@ -74,7 +74,7 @@ from varchar_udf_1 limit 1; set hive.vectorized.adaptor.usage.mode=chosen; -explain +explain vectorization expression select c2 regexp 'val', c4 regexp 'val', @@ -87,7 +87,7 @@ select (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), @@ -100,7 +100,7 @@ select regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), @@ -116,11 +116,11 @@ from varchar_udf_1 limit 1; set hive.vectorized.adaptor.usage.mode=none; -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF; SELECT POWER(key, 2) FROM DECIMAL_UDF; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), @@ -135,11 +135,11 @@ FROM DECIMAL_UDF WHERE key = 10; set hive.vectorized.adaptor.usage.mode=chosen; -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF; SELECT POWER(key, 2) FROM DECIMAL_UDF; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), @@ -155,14 +155,14 @@ FROM DECIMAL_UDF WHERE key = 10; set hive.vectorized.adaptor.usage.mode=none; -explain +explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; set hive.vectorized.adaptor.usage.mode=chosen; -explain +explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; diff --git ql/src/test/queries/clientpositive/vector_aggregate_9.q ql/src/test/queries/clientpositive/vector_aggregate_9.q index ce6f0ff..04fdeec 100644 --- ql/src/test/queries/clientpositive/vector_aggregate_9.q +++ ql/src/test/queries/clientpositive/vector_aggregate_9.q @@ -1,5 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table vectortab2k( t tinyint, @@ -38,7 +39,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q index 8a63635..64440e3 100644 --- ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q +++ ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=true; +set hive.fetch.task.conversion=none; + create table testvec(id int, dt int, greg_dt string) stored as orc; insert into table testvec values @@ -12,5 +14,5 @@ values (7,20150404, '2015-04-04'); set hive.vectorized.execution.enabled=true; set hive.map.aggr=true; -explain select max(dt), max(greg_dt) from testvec where id=5; +explain vectorization select max(dt), max(greg_dt) from testvec where id=5; select max(dt), max(greg_dt) from testvec where id=5; diff --git ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q index 2077f8e..3c53853 100644 --- ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q +++ ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=true; +set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -; +set hive.fetch.task.conversion=none; set hive.exec.reducers.max = 1; @@ -23,7 +23,7 @@ set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; set hive.auto.convert.sortmerge.join=true; -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1; @@ -33,7 +33,7 @@ select count(*) from ( ) subq1; -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select key, count(*) from @@ -54,7 +54,7 @@ select count(*) from -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization expression select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -89,7 +89,7 @@ on src1.key = src2.key; -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -104,7 +104,7 @@ select count(*) from -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization expression select count(*) from ( select * from @@ -129,7 +129,7 @@ select count(*) from -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select * from @@ -169,7 +169,7 @@ select count(*) from -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization expression select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -184,7 +184,7 @@ select count(*) from -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization expression select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -199,7 +199,7 @@ select count(*) from -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key; @@ -210,7 +210,7 @@ select count(*) from -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -231,7 +231,7 @@ select count(*) from -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -261,7 +261,7 @@ CREATE TABLE dest2(key int, val1 string, val2 string); -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -282,7 +282,7 @@ CREATE TABLE dest2(key int, cnt int); -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 diff --git ql/src/test/queries/clientpositive/vector_between_columns.q ql/src/test/queries/clientpositive/vector_between_columns.q index 41f9243..920b692 100644 --- ql/src/test/queries/clientpositive/vector_between_columns.q +++ ql/src/test/queries/clientpositive/vector_between_columns.q @@ -24,13 +24,13 @@ create table TSINT stored as orc AS SELECT * FROM TSINT_txt; create table TINT stored as orc AS SELECT * FROM TINT_txt; -explain +explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint; select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint; -explain +explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; diff --git ql/src/test/queries/clientpositive/vector_between_in.q ql/src/test/queries/clientpositive/vector_between_in.q index 989d8dd..c336829 100644 --- ql/src/test/queries/clientpositive/vector_between_in.q +++ ql/src/test/queries/clientpositive/vector_between_in.q @@ -1,24 +1,25 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; CREATE TABLE decimal_date_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, CAST(CAST((CAST(cint AS BIGINT) *ctinyint) AS TIMESTAMP) AS DATE) AS cdate FROM alltypesorc ORDER BY cdate; -EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; -EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)); +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)); -EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1; -EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568); +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568); -EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate; -EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate; -EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1; -EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351; +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351; SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; @@ -40,13 +41,13 @@ SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 439 -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0; +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0; -EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0; +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0; -EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0; +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0; -EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0; +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0; SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0; diff --git ql/src/test/queries/clientpositive/vector_binary_join_groupby.q ql/src/test/queries/clientpositive/vector_binary_join_groupby.q index 1d99e34..d19dbc3 100644 --- ql/src/test/queries/clientpositive/vector_binary_join_groupby.q +++ ql/src/test/queries/clientpositive/vector_binary_join_groupby.q @@ -4,6 +4,7 @@ SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; DROP TABLE over1k; DROP TABLE hundredorc; @@ -40,14 +41,14 @@ STORED AS ORC; INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin; @@ -58,6 +59,6 @@ GROUP BY bin; -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_bround.q ql/src/test/queries/clientpositive/vector_bround.q index deea00b..ffa3ad3 100644 --- ql/src/test/queries/clientpositive/vector_bround.q +++ ql/src/test/queries/clientpositive/vector_bround.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=true; +SET hive.fetch.task.conversion=none; + create table test_vector_bround(v0 double, v1 double) stored as orc; insert into table test_vector_bround values @@ -12,5 +14,5 @@ values (2.51, 1.251), (3.51, 1.351); set hive.vectorized.execution.enabled=true; -explain select bround(v0), bround(v1, 1) from test_vector_bround; +explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround; select bround(v0), bround(v1, 1) from test_vector_bround; diff --git ql/src/test/queries/clientpositive/vector_bucket.q ql/src/test/queries/clientpositive/vector_bucket.q index 39436c9..b67592e 100644 --- ql/src/test/queries/clientpositive/vector_bucket.q +++ ql/src/test/queries/clientpositive/vector_bucket.q @@ -2,12 +2,13 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.support.concurrency=true; +set hive.fetch.task.conversion=none; CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile; -explain +explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three'); select a, b from non_orc_table order by a; insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three'); select a, b from non_orc_table order by a; diff --git ql/src/test/queries/clientpositive/vector_cast_constant.q ql/src/test/queries/clientpositive/vector_cast_constant.q index 94bee09..aac7f92 100644 --- ql/src/test/queries/clientpositive/vector_cast_constant.q +++ ql/src/test/queries/clientpositive/vector_cast_constant.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; - +set hive.fetch.task.conversion=none; DROP TABLE over1k; DROP TABLE over1korc; @@ -38,7 +38,7 @@ STORED AS ORC; INSERT INTO TABLE over1korc SELECT * FROM over1k; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, diff --git ql/src/test/queries/clientpositive/vector_char_2.q ql/src/test/queries/clientpositive/vector_char_2.q index f1bb75b..5520ddd 100644 --- ql/src/test/queries/clientpositive/vector_char_2.q +++ ql/src/test/queries/clientpositive/vector_char_2.q @@ -1,6 +1,8 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + drop table char_2; create table char_2 ( @@ -16,7 +18,7 @@ group by value order by value asc limit 5; -explain select value, sum(cast(key as int)), count(*) numrows +explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc @@ -35,7 +37,7 @@ group by value order by value desc limit 5; -explain select value, sum(cast(key as int)), count(*) numrows +explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc diff --git ql/src/test/queries/clientpositive/vector_char_4.q ql/src/test/queries/clientpositive/vector_char_4.q index 06f1d2b..edc59cd 100644 --- ql/src/test/queries/clientpositive/vector_char_4.q +++ ql/src/test/queries/clientpositive/vector_char_4.q @@ -1,5 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; drop table if exists vectortab2k; drop table if exists vectortab2korc; @@ -44,7 +45,7 @@ INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; drop table if exists char_lazy_binary_columnar; create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; -explain +explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; -- insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; diff --git ql/src/test/queries/clientpositive/vector_char_cast.q ql/src/test/queries/clientpositive/vector_char_cast.q index bc78d51..c7d3c3c 100644 --- ql/src/test/queries/clientpositive/vector_char_cast.q +++ ql/src/test/queries/clientpositive/vector_char_cast.q @@ -1,3 +1,5 @@ +set hive.fetch.task.conversion=none; + create table s1(id smallint) stored as orc; insert into table s1 values (1000),(1001),(1002),(1003),(1000); diff --git ql/src/test/queries/clientpositive/vector_char_mapjoin1.q ql/src/test/queries/clientpositive/vector_char_mapjoin1.q index 58a73be..f5c05a4 100644 --- ql/src/test/queries/clientpositive/vector_char_mapjoin1.q +++ ql/src/test/queries/clientpositive/vector_char_mapjoin1.q @@ -37,21 +37,21 @@ create table char_join1_vc2_orc stored as orc as select * from char_join1_vc2; create table char_join1_str_orc stored as orc as select * from char_join1_str; -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; -- SORT_QUERY_RESULTS select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; -- SORT_QUERY_RESULTS select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_char_simple.q ql/src/test/queries/clientpositive/vector_char_simple.q index a921140..c315241 100644 --- ql/src/test/queries/clientpositive/vector_char_simple.q +++ ql/src/test/queries/clientpositive/vector_char_simple.q @@ -1,5 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + drop table char_2; create table char_2 ( @@ -14,7 +16,7 @@ from src order by key asc limit 5; -explain select key, value +explain vectorization only select key, value from char_2 order by key asc limit 5; @@ -30,7 +32,7 @@ from src order by key desc limit 5; -explain select key, value +explain vectorization only select key, value from char_2 order by key desc limit 5; @@ -49,7 +51,7 @@ create table char_3 ( field char(12) ) stored as orc; -explain +explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10; insert into table char_3 select cint from alltypesorc limit 10; diff --git ql/src/test/queries/clientpositive/vector_coalesce.q ql/src/test/queries/clientpositive/vector_coalesce.q index cfba7be..11296e1 100644 --- ql/src/test/queries/clientpositive/vector_coalesce.q +++ ql/src/test/queries/clientpositive/vector_coalesce.q @@ -1,10 +1,11 @@ set hive.stats.fetch.column.stats=true; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c @@ -16,7 +17,7 @@ WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10; -EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c @@ -28,7 +29,7 @@ WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10; -EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c @@ -40,7 +41,7 @@ WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10; -EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c @@ -52,7 +53,7 @@ WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10; -EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c @@ -64,7 +65,7 @@ WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10; -EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_coalesce_2.q ql/src/test/queries/clientpositive/vector_coalesce_2.q index c847e20..ea45ddd 100644 --- ql/src/test/queries/clientpositive/vector_coalesce_2.q +++ ql/src/test/queries/clientpositive/vector_coalesce_2.q @@ -7,7 +7,7 @@ create table str_str_orc (str1 string, str2 string) stored as orc; insert into table str_str_orc values (null, "X"), ("0", "X"), ("1", "X"), (null, "y"); -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc @@ -18,7 +18,7 @@ SELECT from str_str_orc GROUP BY str2; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc; @@ -27,7 +27,7 @@ from str_str_orc; SET hive.vectorized.execution.enabled=true; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc @@ -38,7 +38,7 @@ SELECT from str_str_orc GROUP BY str2; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc; diff --git ql/src/test/queries/clientpositive/vector_complex_join.q ql/src/test/queries/clientpositive/vector_complex_join.q index 30f38b1..c6926cb 100644 --- ql/src/test/queries/clientpositive/vector_complex_join.q +++ ql/src/test/queries/clientpositive/vector_complex_join.q @@ -10,7 +10,7 @@ set hive.fetch.task.conversion=none; CREATE TABLE test (a INT, b MAP) STORED AS ORC; INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1; -explain +explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a; select * from alltypesorc join test where alltypesorc.cint=test.a; @@ -23,7 +23,7 @@ INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1; CREATE TABLE test2b (a INT) STORED AS ORC; INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4); -explain +explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1]; select * from test2b join test2a on test2b.a = test2a.a[1]; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_count.q ql/src/test/queries/clientpositive/vector_count.q index 341db74..74b24cc 100644 --- ql/src/test/queries/clientpositive/vector_count.q +++ ql/src/test/queries/clientpositive/vector_count.q @@ -12,15 +12,15 @@ create table abcd stored as orc as select * from abcd_txt; select * from abcd; set hive.map.aggr=true; -explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; +explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; -explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; +explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; set hive.map.aggr=false; -explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; +explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; -explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; +explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; diff --git ql/src/test/queries/clientpositive/vector_count_distinct.q ql/src/test/queries/clientpositive/vector_count_distinct.q index ec72079..72ca3fa 100644 --- ql/src/test/queries/clientpositive/vector_count_distinct.q +++ ql/src/test/queries/clientpositive/vector_count_distinct.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table web_sales_txt ( @@ -104,7 +105,7 @@ select ws_sold_date_sk, ws_sold_time_sk, ws_ship_date_sk, ws_item_sk, ------------------------------------------------------------------------------------------ -explain +explain vectorization expression select count(distinct ws_order_number) from web_sales; select count(distinct ws_order_number) from web_sales; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_data_types.q ql/src/test/queries/clientpositive/vector_data_types.q index c7e0d1b..d3ee19b 100644 --- ql/src/test/queries/clientpositive/vector_data_types.q +++ ql/src/test/queries/clientpositive/vector_data_types.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; DROP TABLE over1k; DROP TABLE over1korc; @@ -38,7 +39,7 @@ INSERT INTO TABLE over1korc SELECT * FROM over1k; SET hive.vectorized.execution.enabled=false; -EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; +EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; @@ -47,7 +48,7 @@ FROM (SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, s SET hive.vectorized.execution.enabled=true; -EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; +EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; diff --git ql/src/test/queries/clientpositive/vector_date_1.q ql/src/test/queries/clientpositive/vector_date_1.q index 072ed5c..7e8768c 100644 --- ql/src/test/queries/clientpositive/vector_date_1.q +++ ql/src/test/queries/clientpositive/vector_date_1.q @@ -1,8 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; - set hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table if exists vector_date_1; create table vector_date_1 (dt1 date, dt2 date) stored as orc; diff --git ql/src/test/queries/clientpositive/vector_decimal_1.q ql/src/test/queries/clientpositive/vector_decimal_1.q index 8a1503f..e797892 100644 --- ql/src/test/queries/clientpositive/vector_decimal_1.q +++ ql/src/test/queries/clientpositive/vector_decimal_1.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table if exists decimal_1; diff --git ql/src/test/queries/clientpositive/vector_decimal_10_0.q ql/src/test/queries/clientpositive/vector_decimal_10_0.q index 596b2bd..14650f9 100644 --- ql/src/test/queries/clientpositive/vector_decimal_10_0.q +++ ql/src/test/queries/clientpositive/vector_decimal_10_0.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS decimal_txt; DROP TABLE IF EXISTS `decimal`; diff --git ql/src/test/queries/clientpositive/vector_decimal_2.q ql/src/test/queries/clientpositive/vector_decimal_2.q index f1477ce..e00fefe 100644 --- ql/src/test/queries/clientpositive/vector_decimal_2.q +++ ql/src/test/queries/clientpositive/vector_decimal_2.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table decimal_2; diff --git ql/src/test/queries/clientpositive/vector_decimal_3.q ql/src/test/queries/clientpositive/vector_decimal_3.q index 9fa5d6f..c23a652 100644 --- ql/src/test/queries/clientpositive/vector_decimal_3.q +++ ql/src/test/queries/clientpositive/vector_decimal_3.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_3_txt; DROP TABLE IF EXISTS DECIMAL_3; diff --git ql/src/test/queries/clientpositive/vector_decimal_4.q ql/src/test/queries/clientpositive/vector_decimal_4.q index 29c9875..0c34074 100644 --- ql/src/test/queries/clientpositive/vector_decimal_4.q +++ ql/src/test/queries/clientpositive/vector_decimal_4.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_4_1; DROP TABLE IF EXISTS DECIMAL_4_2; diff --git ql/src/test/queries/clientpositive/vector_decimal_5.q ql/src/test/queries/clientpositive/vector_decimal_5.q index 7cf604d..f5de13b 100644 --- ql/src/test/queries/clientpositive/vector_decimal_5.q +++ ql/src/test/queries/clientpositive/vector_decimal_5.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_5_txt; DROP TABLE IF EXISTS DECIMAL_5; diff --git ql/src/test/queries/clientpositive/vector_decimal_6.q ql/src/test/queries/clientpositive/vector_decimal_6.q index 1d0c3ae..fe145e6 100644 --- ql/src/test/queries/clientpositive/vector_decimal_6.q +++ ql/src/test/queries/clientpositive/vector_decimal_6.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_6_1_txt; DROP TABLE IF EXISTS DECIMAL_6_1; diff --git ql/src/test/queries/clientpositive/vector_decimal_aggregate.q ql/src/test/queries/clientpositive/vector_decimal_aggregate.q index 552a564..843b57e 100644 --- ql/src/test/queries/clientpositive/vector_decimal_aggregate.q +++ ql/src/test/queries/clientpositive/vector_decimal_aggregate.q @@ -1,4 +1,6 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; + CREATE TABLE decimal_vgby STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, @@ -10,7 +12,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby @@ -24,7 +26,7 @@ SELECT cint, HAVING COUNT(*) > 1; -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby diff --git ql/src/test/queries/clientpositive/vector_decimal_cast.q ql/src/test/queries/clientpositive/vector_decimal_cast.q index eb0e75c..fc8861e 100644 --- ql/src/test/queries/clientpositive/vector_decimal_cast.q +++ ql/src/test/queries/clientpositive/vector_decimal_cast.q @@ -1,6 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10; SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_decimal_expressions.q ql/src/test/queries/clientpositive/vector_decimal_expressions.q index 33d0747..864e552 100644 --- ql/src/test/queries/clientpositive/vector_decimal_expressions.q +++ ql/src/test/queries/clientpositive/vector_decimal_expressions.q @@ -1,11 +1,12 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc; SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q index 3007239..dac0317 100644 --- ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q @@ -4,6 +4,7 @@ SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; CREATE TABLE over1k(t tinyint, si smallint, @@ -26,7 +27,7 @@ INSERT INTO TABLE t1 select dec from over1k; CREATE TABLE t2(dec decimal(4,0)) STORED AS ORC; INSERT INTO TABLE t2 select dec from over1k; -explain +explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec); -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q index 4ebde6a..08e1e0f 100644 --- ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q +++ ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q @@ -1,11 +1,13 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; CREATE TABLE decimal_test STORED AS ORC AS SELECT cbigint, cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) diff --git ql/src/test/queries/clientpositive/vector_decimal_precision.q ql/src/test/queries/clientpositive/vector_decimal_precision.q index cc3fb63..97616f6 100644 --- ql/src/test/queries/clientpositive/vector_decimal_precision.q +++ ql/src/test/queries/clientpositive/vector_decimal_precision.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_PRECISION_txt; DROP TABLE IF EXISTS DECIMAL_PRECISION; @@ -26,7 +26,7 @@ SELECT dec, dec / 9 FROM DECIMAL_PRECISION ORDER BY dec; SELECT dec, dec / 27 FROM DECIMAL_PRECISION ORDER BY dec; SELECT dec, dec * dec FROM DECIMAL_PRECISION ORDER BY dec; -EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION; +EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION; SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION; SELECT dec * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1; diff --git ql/src/test/queries/clientpositive/vector_decimal_round.q ql/src/test/queries/clientpositive/vector_decimal_round.q index bf83163..ba20fef 100644 --- ql/src/test/queries/clientpositive/vector_decimal_round.q +++ ql/src/test/queries/clientpositive/vector_decimal_round.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; create table decimal_tbl_txt (dec decimal(10,0)) ROW FORMAT DELIMITED @@ -12,12 +12,12 @@ insert into table decimal_tbl_txt values(101); select * from decimal_tbl_txt; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec; select dec, round(dec, -1) from decimal_tbl_txt order by dec; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1); select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1); @@ -29,12 +29,12 @@ insert into table decimal_tbl_rc values(101); select * from decimal_tbl_rc; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec; select dec, round(dec, -1) from decimal_tbl_rc order by dec; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1); select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1); @@ -46,12 +46,12 @@ insert into table decimal_tbl_orc values(101); select * from decimal_tbl_orc; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec; select dec, round(dec, -1) from decimal_tbl_orc order by dec; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1); select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_decimal_round_2.q ql/src/test/queries/clientpositive/vector_decimal_round_2.q index 0020325..7afc780 100644 --- ql/src/test/queries/clientpositive/vector_decimal_round_2.q +++ ql/src/test/queries/clientpositive/vector_decimal_round_2.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; create table decimal_tbl_1_orc (dec decimal(38,18)) STORED AS ORC; @@ -19,7 +19,7 @@ select * from decimal_tbl_1_orc; -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), @@ -39,7 +39,7 @@ insert into table decimal_tbl_2_orc values(125.315, -125.315); select * from decimal_tbl_2_orc; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -65,7 +65,7 @@ insert into table decimal_tbl_3_orc values(3.141592653589793); select * from decimal_tbl_3_orc; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -113,7 +113,7 @@ insert into table decimal_tbl_4_orc values(1809242.3151111344, -1809242.31511113 select * from decimal_tbl_4_orc; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p; diff --git ql/src/test/queries/clientpositive/vector_decimal_trailing.q ql/src/test/queries/clientpositive/vector_decimal_trailing.q index 5f13396..40935aa 100644 --- ql/src/test/queries/clientpositive/vector_decimal_trailing.q +++ ql/src/test/queries/clientpositive/vector_decimal_trailing.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_TRAILING_txt; DROP TABLE IF EXISTS DECIMAL_TRAILING; diff --git ql/src/test/queries/clientpositive/vector_decimal_udf2.q ql/src/test/queries/clientpositive/vector_decimal_udf2.q index 433f464..a013f1f 100644 --- ql/src/test/queries/clientpositive/vector_decimal_udf2.q +++ ql/src/test/queries/clientpositive/vector_decimal_udf2.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_UDF2_txt; DROP TABLE IF EXISTS DECIMAL_UDF2; @@ -18,14 +18,14 @@ STORED AS ORC; INSERT OVERWRITE TABLE DECIMAL_UDF2 SELECT * FROM DECIMAL_UDF2_txt; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10; SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), diff --git ql/src/test/queries/clientpositive/vector_distinct_2.q ql/src/test/queries/clientpositive/vector_distinct_2.q index 509b262..4be23c1 100644 --- ql/src/test/queries/clientpositive/vector_distinct_2.q +++ ql/src/test/queries/clientpositive/vector_distinct_2.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -41,7 +42,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select distinct s, t from vectortab2korc; select distinct s, t from vectortab2korc; diff --git ql/src/test/queries/clientpositive/vector_elt.q ql/src/test/queries/clientpositive/vector_elt.q index f44a3be..5e54cbb 100644 --- ql/src/test/queries/clientpositive/vector_elt.q +++ ql/src/test/queries/clientpositive/vector_elt.q @@ -1,8 +1,9 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10; @@ -10,7 +11,7 @@ SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cin FROM alltypesorc WHERE ctinyint > 0 LIMIT 10; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), diff --git ql/src/test/queries/clientpositive/vector_empty_where.q ql/src/test/queries/clientpositive/vector_empty_where.q index 0543a65..3e94c92 100644 --- ql/src/test/queries/clientpositive/vector_empty_where.q +++ ql/src/test/queries/clientpositive/vector_empty_where.q @@ -2,22 +2,22 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; -- HIVE- -explain +explain vectorization expression select count (distinct cint) from alltypesorc where cstring1; select count (distinct cint) from alltypesorc where cstring1; -explain +explain vectorization expression select count (distinct cint) from alltypesorc where cint; select count (distinct cint) from alltypesorc where cint; -explain +explain vectorization expression select count (distinct cint) from alltypesorc where cfloat; select count (distinct cint) from alltypesorc where cfloat; -explain +explain vectorization expression select count (distinct cint) from alltypesorc where ctimestamp1; select count (distinct cint) from alltypesorc where ctimestamp1; diff --git ql/src/test/queries/clientpositive/vector_groupby4.q ql/src/test/queries/clientpositive/vector_groupby4.q index a59d1a8..1906c07 100644 --- ql/src/test/queries/clientpositive/vector_groupby4.q +++ ql/src/test/queries/clientpositive/vector_groupby4.q @@ -12,7 +12,7 @@ CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src; CREATE TABLE dest1(c1 STRING) STORED AS ORC; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1); diff --git ql/src/test/queries/clientpositive/vector_groupby6.q ql/src/test/queries/clientpositive/vector_groupby6.q index 89c7a19..cb01882 100644 --- ql/src/test/queries/clientpositive/vector_groupby6.q +++ ql/src/test/queries/clientpositive/vector_groupby6.q @@ -12,7 +12,7 @@ CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src; CREATE TABLE dest1(c1 STRING) STORED AS ORC; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1); diff --git ql/src/test/queries/clientpositive/vector_groupby_3.q ql/src/test/queries/clientpositive/vector_groupby_3.q index d42d7f1..299ee92 100644 --- ql/src/test/queries/clientpositive/vector_groupby_3.q +++ ql/src/test/queries/clientpositive/vector_groupby_3.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -41,7 +42,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t; select s, t, max(b) from vectortab2korc group by s, t; diff --git ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q index 53df2aa..c692182 100644 --- ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q @@ -1,5 +1,5 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=true; +set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; @@ -8,7 +8,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; set hive.exec.dynamic.partition.mode=nonstrict; -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization expression select * from src where not key in diff --git ql/src/test/queries/clientpositive/vector_groupby_reduce.q ql/src/test/queries/clientpositive/vector_groupby_reduce.q index e78b57f..bafb32e 100644 --- ql/src/test/queries/clientpositive/vector_groupby_reduce.q +++ ql/src/test/queries/clientpositive/vector_groupby_reduce.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table store_sales_txt ( @@ -91,7 +92,7 @@ ss_sold_date_sk , ss_net_profit from store_sales_txt; -explain +explain vectorization expression select ss_ticket_number from @@ -108,7 +109,7 @@ limit 20; -explain +explain vectorization expression select min(ss_ticket_number) m from @@ -133,7 +134,7 @@ order by m; -explain +explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -157,7 +158,7 @@ group by ss_ticket_number order by ss_ticket_number; -explain +explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from diff --git ql/src/test/queries/clientpositive/vector_grouping_sets.q ql/src/test/queries/clientpositive/vector_grouping_sets.q index ec5a3c7..3ea2730 100644 --- ql/src/test/queries/clientpositive/vector_grouping_sets.q +++ ql/src/test/queries/clientpositive/vector_grouping_sets.q @@ -45,7 +45,7 @@ create table store stored as orc as select * from store_txt; -explain +explain vectorization expression select s_store_id from store group by s_store_id with rollup; @@ -54,7 +54,7 @@ select s_store_id from store group by s_store_id with rollup; -explain +explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup; diff --git ql/src/test/queries/clientpositive/vector_if_expr.q ql/src/test/queries/clientpositive/vector_if_expr.q index 475cecf..6e7ccf7 100644 --- ql/src/test/queries/clientpositive/vector_if_expr.q +++ ql/src/test/queries/clientpositive/vector_if_expr.q @@ -1,9 +1,9 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1; SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 LIMIT 5; diff --git ql/src/test/queries/clientpositive/vector_include_no_sel.q ql/src/test/queries/clientpositive/vector_include_no_sel.q index 03f676b..a499ae5 100644 --- ql/src/test/queries/clientpositive/vector_include_no_sel.q +++ ql/src/test/queries/clientpositive/vector_include_no_sel.q @@ -6,6 +6,7 @@ SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; SET hive.mapred.mode=nonstrict; +set hive.fetch.task.conversion=none; -- HIVE-13872 -- Looking for TableScan immediately followed by ReduceSink (no intervening SEL operator). @@ -69,7 +70,7 @@ LOAD DATA LOCAL INPATH '../../data/files/customer_demographics.txt' OVERWRITE IN create table customer_demographics stored as orc as select * from customer_demographics_txt; -explain +explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')); diff --git ql/src/test/queries/clientpositive/vector_inner_join.q ql/src/test/queries/clientpositive/vector_inner_join.q index 24b66bf..54194a8 100644 --- ql/src/test/queries/clientpositive/vector_inner_join.q +++ ql/src/test/queries/clientpositive/vector_inner_join.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; CREATE TABLE orc_table_1a(a INT) STORED AS ORC; CREATE TABLE orc_table_2a(c INT) STORED AS ORC; @@ -9,12 +10,12 @@ CREATE TABLE orc_table_2a(c INT) STORED AS ORC; insert into table orc_table_1a values(1),(1), (2),(3); insert into table orc_table_2a values(0),(2), (3),(null),(4); -explain +explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2; select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2; select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2; @@ -26,38 +27,38 @@ CREATE TABLE orc_table_2b(c INT, v2 STRING) STORED AS ORC; insert into table orc_table_1b values("one", 1),("one", 1), ("two", 2),("three", 3); insert into table orc_table_2b values(0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, ""),(4, "FOUR"); -explain +explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; diff --git ql/src/test/queries/clientpositive/vector_interval_1.q ql/src/test/queries/clientpositive/vector_interval_1.q index 8fefe41..f4f0024 100644 --- ql/src/test/queries/clientpositive/vector_interval_1.q +++ ql/src/test/queries/clientpositive/vector_interval_1.q @@ -1,8 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; - set hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table if exists vector_interval_1; create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 string) stored as orc; @@ -13,7 +12,7 @@ insert into vector_interval_1 select null, null, null, null from src limit 1; -- constants/cast from string -explain +explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), @@ -28,7 +27,7 @@ from vector_interval_1 order by str1; -- interval arithmetic -explain +explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -49,7 +48,7 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt; -explain +explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -72,7 +71,7 @@ from vector_interval_1 order by dt; -- date-interval arithmetic -explain +explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -107,7 +106,7 @@ from vector_interval_1 order by dt; -- timestamp-interval arithmetic -explain +explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -142,7 +141,7 @@ from vector_interval_1 order by ts; -- timestamp-timestamp arithmetic -explain +explain vectorization expression select ts, ts - ts, @@ -159,7 +158,7 @@ from vector_interval_1 order by ts; -- date-date arithmetic -explain +explain vectorization expression select dt, dt - dt, @@ -176,7 +175,7 @@ from vector_interval_1 order by dt; -- date-timestamp arithmetic -explain +explain vectorization expression select dt, ts - dt, diff --git ql/src/test/queries/clientpositive/vector_interval_2.q ql/src/test/queries/clientpositive/vector_interval_2.q index 5afb511..0b78a4b 100644 --- ql/src/test/queries/clientpositive/vector_interval_2.q +++ ql/src/test/queries/clientpositive/vector_interval_2.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table if exists vector_interval_2; create table vector_interval_2 (ts timestamp, dt date, str1 string, str2 string, str3 string, str4 string) stored as orc; @@ -14,7 +14,7 @@ insert into vector_interval_2 -- interval comparisons in select clause -explain +explain vectorization expression select str1, -- Should all be true @@ -77,7 +77,7 @@ select interval '1-2' year to month != interval_year_month(str2) from vector_interval_2 order by str1; -explain +explain vectorization expression select str1, -- Should all be false @@ -128,7 +128,7 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1; -explain +explain vectorization expression select str3, -- Should all be true @@ -191,7 +191,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3; -explain +explain vectorization expression select str3, -- Should all be false @@ -244,7 +244,7 @@ from vector_interval_2 order by str3; -- interval expressions in predicates -explain +explain vectorization expression select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -293,7 +293,7 @@ where and interval '1-3' year to month > interval_year_month(str1) order by ts; -explain +explain vectorization expression select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -342,7 +342,7 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts; -explain +explain vectorization expression select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -381,7 +381,7 @@ where and dt != dt + interval '1-2' year to month order by ts; -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -431,7 +431,7 @@ where order by ts; -- day to second expressions in predicate -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -480,7 +480,7 @@ where and ts > dt - interval '0 1:2:4' day to second order by ts; -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day diff --git ql/src/test/queries/clientpositive/vector_interval_arithmetic.q ql/src/test/queries/clientpositive/vector_interval_arithmetic.q index 40c4c03..9a551e8 100644 --- ql/src/test/queries/clientpositive/vector_interval_arithmetic.q +++ ql/src/test/queries/clientpositive/vector_interval_arithmetic.q @@ -13,7 +13,7 @@ insert overwrite table interval_arithmetic_1 SET hive.vectorized.execution.enabled=true; -- interval year-month arithmetic -explain +explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -36,7 +36,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -53,7 +53,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -76,7 +76,7 @@ select from interval_arithmetic_1 order by tsval; -explain +explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -93,7 +93,7 @@ limit 2; -- interval day-time arithmetic -explain +explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -116,7 +116,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization expression select dateval, tsval, @@ -135,7 +135,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -158,7 +158,7 @@ select from interval_arithmetic_1 order by tsval; -explain +explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second diff --git ql/src/test/queries/clientpositive/vector_interval_mapjoin.q ql/src/test/queries/clientpositive/vector_interval_mapjoin.q index 36ccd35..d27e67b 100644 --- ql/src/test/queries/clientpositive/vector_interval_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_interval_mapjoin.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; create table vectortab_a_1k( t tinyint, @@ -45,7 +46,7 @@ LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE ve CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k; -explain +explain vectorization expression select v1.s, v2.s, diff --git ql/src/test/queries/clientpositive/vector_join.q ql/src/test/queries/clientpositive/vector_join.q index 9238a6e..b086a13 100644 --- ql/src/test/queries/clientpositive/vector_join.q +++ ql/src/test/queries/clientpositive/vector_join.q @@ -32,6 +32,7 @@ TBLPROPERTIES ( set hive.auto.convert.join=false; set hive.vectorized.execution.enabled = true; set hive.mapred.mode=nonstrict; +set hive.fetch.task.conversion=none; SELECT cr.id1 , cr.id2 FROM diff --git ql/src/test/queries/clientpositive/vector_join30.q ql/src/test/queries/clientpositive/vector_join30.q index 1467cd3..6557a71 100644 --- ql/src/test/queries/clientpositive/vector_join30.q +++ ql/src/test/queries/clientpositive/vector_join30.q @@ -10,7 +10,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src; -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -25,7 +25,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -40,7 +40,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -55,7 +55,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -76,7 +76,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -97,7 +97,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -118,7 +118,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -139,7 +139,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN diff --git ql/src/test/queries/clientpositive/vector_join_part_col_char.q ql/src/test/queries/clientpositive/vector_join_part_col_char.q index 5cfce37..e625a64 100644 --- ql/src/test/queries/clientpositive/vector_join_part_col_char.q +++ ql/src/test/queries/clientpositive/vector_join_part_col_char.q @@ -4,6 +4,7 @@ set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.vectorized.execution.enabled=true; set hive.explain.user=true; set hive.metastore.fastpath=false; +set hive.fetch.task.conversion=none; drop table if exists char_part_tbl1 ; drop table if exists char_part_tbl2; @@ -22,7 +23,7 @@ insert into table char_tbl2 partition(gpa='3') select name, age from studenttab show partitions char_tbl1; show partitions char_tbl2; -explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); +explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); set hive.vectorized.execution.enabled=false; diff --git ql/src/test/queries/clientpositive/vector_left_outer_join.q ql/src/test/queries/clientpositive/vector_left_outer_join.q index ddf2660..0684f12 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join.q @@ -3,7 +3,9 @@ set hive.explain.user=false; set hive.vectorized.execution.enabled=true; set hive.auto.convert.join=true; set hive.mapjoin.hybridgrace.hashtable=false; -explain +set hive.fetch.task.conversion=none; + +explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q index 5da5d50..ccceb36 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -20,14 +20,14 @@ INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -36,7 +36,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -44,7 +44,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -52,7 +52,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -60,7 +60,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q index dfb8405..8469a06 100644 --- ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q @@ -26,381 +26,502 @@ select * from t4; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization only summary + +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization only operator +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization only operator +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization only operator +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization only operator +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization only operator +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization only operator +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization only operator +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization only operator +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization only operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; diff --git ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q index c9e9e48..d960559 100644 --- ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q +++ ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -9,7 +10,7 @@ SET hive.auto.convert.join=true; -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -23,7 +24,7 @@ where li.l_linenumber = 1 and ; -- non agg, corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and diff --git ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q index 1f17669..22830a5 100644 --- ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q +++ ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table date_dim ( @@ -104,7 +105,7 @@ stored as orc; -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, diff --git ql/src/test/queries/clientpositive/vector_multi_insert.q ql/src/test/queries/clientpositive/vector_multi_insert.q index 374a0da..c56ee1c 100644 --- ql/src/test/queries/clientpositive/vector_multi_insert.q +++ ql/src/test/queries/clientpositive/vector_multi_insert.q @@ -1,6 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; create table orc1 stored as orc @@ -22,7 +22,7 @@ create table orc_rn3 (rn int); analyze table orc1 compute statistics; -explain from orc1 a +explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000; diff --git ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q index 69142bf..113ea7f 100644 --- ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q +++ ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q @@ -1,4 +1,4 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; -explain SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint); \ No newline at end of file +explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_non_string_partition.q ql/src/test/queries/clientpositive/vector_non_string_partition.q index bffc93e..cf3e765 100644 --- ql/src/test/queries/clientpositive/vector_non_string_partition.q +++ ql/src/test/queries/clientpositive/vector_non_string_partition.q @@ -4,16 +4,17 @@ SET hive.vectorized.execution.enabled=true; CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC; SET hive.exec.dynamic.partition.mode=nonstrict; SET hive.exec.dynamic.partition=true; +set hive.fetch.task.conversion=none; INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble; SHOW PARTITIONS non_string_part; -EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; -EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_null_projection.q ql/src/test/queries/clientpositive/vector_null_projection.q index 66c0838..711b8e7 100644 --- ql/src/test/queries/clientpositive/vector_null_projection.q +++ ql/src/test/queries/clientpositive/vector_null_projection.q @@ -10,12 +10,12 @@ insert into table a values('aaa'); insert into table b values('aaa'); -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization expression select NULL from a; select NULL from a; -explain +explain vectorization expression select NULL as x from a union distinct select NULL as x from b; select NULL as x from a union distinct select NULL as x from b; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_nullsafe_join.q ql/src/test/queries/clientpositive/vector_nullsafe_join.q index b316a54..6a7ff72 100644 --- ql/src/test/queries/clientpositive/vector_nullsafe_join.q +++ ql/src/test/queries/clientpositive/vector_nullsafe_join.q @@ -1,6 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -- @@ -20,19 +21,19 @@ CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt; SET hive.vectorized.execution.mapjoin.native.enabled=false; -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value; select * from myinput1 a join myinput1 b on a.key<=>b.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; -- outer joins @@ -47,19 +48,19 @@ SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value; SET hive.vectorized.execution.mapjoin.native.enabled=true; -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value; select * from myinput1 a join myinput1 b on a.key<=>b.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; -- outer joins diff --git ql/src/test/queries/clientpositive/vector_number_compare_projection.q ql/src/test/queries/clientpositive/vector_number_compare_projection.q index feb5e98..3f4f5aa 100644 --- ql/src/test/queries/clientpositive/vector_number_compare_projection.q +++ ql/src/test/queries/clientpositive/vector_number_compare_projection.q @@ -35,7 +35,7 @@ SET hive.vectorized.execution.enabled=true; -- -- Projection LongColLongScalar -- -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q; @@ -44,7 +44,7 @@ SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q; diff --git ql/src/test/queries/clientpositive/vector_nvl.q ql/src/test/queries/clientpositive/vector_nvl.q index 742bf52..e00e82f 100644 --- ql/src/test/queries/clientpositive/vector_nvl.q +++ ql/src/test/queries/clientpositive/vector_nvl.q @@ -1,7 +1,8 @@ SET hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10; @@ -11,7 +12,7 @@ FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10; -EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10; @@ -19,7 +20,7 @@ SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10; -EXPLAIN SELECT nvl(null, 10) as n +EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10; @@ -27,7 +28,7 @@ SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10; -EXPLAIN SELECT nvl(null, null) as n +EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_orderby_5.q ql/src/test/queries/clientpositive/vector_orderby_5.q index 30bcaef..17ccf82 100644 --- ql/src/test/queries/clientpositive/vector_orderby_5.q +++ ql/src/test/queries/clientpositive/vector_orderby_5.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table vectortab2k( t tinyint, @@ -39,7 +40,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc; select bo, max(b) from vectortab2korc group by bo order by bo desc; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_outer_join0.q ql/src/test/queries/clientpositive/vector_outer_join0.q index dce3a1b..d7586c7 100644 --- ql/src/test/queries/clientpositive/vector_outer_join0.q +++ ql/src/test/queries/clientpositive/vector_outer_join0.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; CREATE TABLE orc_table_1(v1 STRING, a INT) STORED AS ORC; CREATE TABLE orc_table_2(c INT, v2 STRING) STORED AS ORC; @@ -12,14 +13,14 @@ insert into table orc_table_2 values (0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, select * from orc_table_1; select * from orc_table_2; -explain +explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c; -- SORT_QUERY_RESULTS select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c; -explain +explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_outer_join1.q ql/src/test/queries/clientpositive/vector_outer_join1.q index 4a36452..6cb2e45 100644 --- ql/src/test/queries/clientpositive/vector_outer_join1.q +++ ql/src/test/queries/clientpositive/vector_outer_join1.q @@ -2,6 +2,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; -- Using cint and ctinyint in test queries create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5; @@ -28,7 +29,7 @@ ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_a; -explain +explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -41,7 +42,7 @@ from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint; -explain +explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd @@ -54,7 +55,7 @@ from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint; -explain +explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/queries/clientpositive/vector_outer_join2.q ql/src/test/queries/clientpositive/vector_outer_join2.q index d3b5805..da17806 100644 --- ql/src/test/queries/clientpositive/vector_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_outer_join2.q @@ -2,6 +2,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; -- Using cint and cbigint in test queries create table small_alltypesorc1a as select * from alltypesorc where cint is not null and cbigint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5; @@ -28,7 +29,7 @@ ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_a; -explain +explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/queries/clientpositive/vector_outer_join3.q ql/src/test/queries/clientpositive/vector_outer_join3.q index e5fc0a9..3f28251 100644 --- ql/src/test/queries/clientpositive/vector_outer_join3.q +++ ql/src/test/queries/clientpositive/vector_outer_join3.q @@ -2,6 +2,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; -- Using cint and cstring1 in test queries create table small_alltypesorc1a as select * from alltypesorc where cint is not null and cstring1 is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5; @@ -27,7 +28,7 @@ ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS; ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_a; -explain +explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -47,7 +48,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1; -explain +explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -67,7 +68,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1; -explain +explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/queries/clientpositive/vector_outer_join4.q ql/src/test/queries/clientpositive/vector_outer_join4.q index 45461b5..d024687 100644 --- ql/src/test/queries/clientpositive/vector_outer_join4.q +++ ql/src/test/queries/clientpositive/vector_outer_join4.q @@ -2,6 +2,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; -- Using cint and ctinyint in test queries create table small_alltypesorc1b as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 10; @@ -28,7 +29,7 @@ ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_b; -explain +explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -41,7 +42,7 @@ from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint; -explain +explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd @@ -54,7 +55,7 @@ from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint; -explain +explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/queries/clientpositive/vector_outer_join5.q ql/src/test/queries/clientpositive/vector_outer_join5.q index 18b9ab4..b8e788a 100644 --- ql/src/test/queries/clientpositive/vector_outer_join5.q +++ ql/src/test/queries/clientpositive/vector_outer_join5.q @@ -5,6 +5,7 @@ SET hive.vectorized.execution.mapjoin.native.enabled=true; set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -22,7 +23,7 @@ as orc as select ctinyint, cbigint from alltypesorc limit 100; ANALYZE TABLE small_table COMPUTE STATISTICS; ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS; -explain +explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -35,7 +36,7 @@ left outer join small_table st on s.ctinyint = st.ctinyint ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -48,7 +49,7 @@ left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -61,7 +62,7 @@ left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -74,7 +75,7 @@ left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -105,7 +106,7 @@ as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc lim ANALYZE TABLE small_table2 COMPUTE STATISTICS; ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS; -explain +explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -118,7 +119,7 @@ left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -131,7 +132,7 @@ left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -144,7 +145,7 @@ left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -157,7 +158,7 @@ left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/queries/clientpositive/vector_outer_join6.q ql/src/test/queries/clientpositive/vector_outer_join6.q index 06fa385..b39e8ed 100644 --- ql/src/test/queries/clientpositive/vector_outer_join6.q +++ ql/src/test/queries/clientpositive/vector_outer_join6.q @@ -3,6 +3,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -28,14 +29,14 @@ create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt; create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt; create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt; -explain +explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; -explain +explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; diff --git ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q index f25374d..b825fb3 100644 --- ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q +++ ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; create table inventory_txt ( @@ -27,7 +27,7 @@ partitioned by (par string) stored as orc; insert into table inventory_part_0 partition(par='1') select * from inventory_txt; insert into table inventory_part_0 partition(par='2') select * from inventory_txt; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_0; select sum(inv_quantity_on_hand) from inventory_part_0; @@ -47,7 +47,7 @@ alter table inventory_part_1 add columns (fifthcol string); insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_1; select sum(inv_quantity_on_hand) from inventory_part_1; @@ -66,7 +66,7 @@ insert into table inventory_part_2a partition(par='1') select * from inventory_t insert into table inventory_part_2a partition(par='2') select * from inventory_txt; alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2a; create table inventory_part_2b( @@ -80,7 +80,7 @@ insert into table inventory_part_2b partition(par1='1',par2=4) select * from inv insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt; alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2b; -- Verify we do not vectorize when a partition column type is different. @@ -97,5 +97,5 @@ insert into table inventory_part_3 partition(par='1') select * from inventory_tx insert into table inventory_part_3 partition(par='2') select * from inventory_txt; alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_partitioned_date_time.q ql/src/test/queries/clientpositive/vector_partitioned_date_time.q index f53d8c0..ee22c01 100644 --- ql/src/test/queries/clientpositive/vector_partitioned_date_time.q +++ ql/src/test/queries/clientpositive/vector_partitioned_date_time.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- Exclude test on Windows due to space character being escaped in Hive paths on Windows. -- EXCLUDE_OS_WINDOWS @@ -32,12 +32,12 @@ select fl_date, count(*) from flights_tiny_orc group by fl_date; SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25; select * from flights_tiny_orc sort by fl_num, fl_date limit 25; -explain +explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date; select fl_date, count(*) from flights_tiny_orc group by fl_date; @@ -71,17 +71,17 @@ select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_date; select * from flights_tiny_orc_partitioned_date; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; -explain +explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; @@ -115,17 +115,17 @@ select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp; select * from flights_tiny_orc_partitioned_timestamp; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; -explain +explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; diff --git ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q index c157df1..bd4931c 100644 --- ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q +++ ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q @@ -30,12 +30,12 @@ select fl_date, count(*) from flights_tiny_orc group by fl_date; SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25; select * from flights_tiny_orc sort by fl_num, fl_date limit 25; -explain +explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date; select fl_date, count(*) from flights_tiny_orc group by fl_date; @@ -69,17 +69,17 @@ select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_date; select * from flights_tiny_orc_partitioned_date; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; -explain +explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; @@ -113,17 +113,17 @@ select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp; select * from flights_tiny_orc_partitioned_timestamp; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; -explain +explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; diff --git ql/src/test/queries/clientpositive/vector_reduce1.q ql/src/test/queries/clientpositive/vector_reduce1.q index cfd803f..ce90491 100644 --- ql/src/test/queries/clientpositive/vector_reduce1.q +++ ql/src/test/queries/clientpositive/vector_reduce1.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reducesink.new.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -42,7 +43,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select b from vectortab2korc order by b; select b from vectortab2korc order by b; diff --git ql/src/test/queries/clientpositive/vector_reduce2.q ql/src/test/queries/clientpositive/vector_reduce2.q index ab67132..80ad196 100644 --- ql/src/test/queries/clientpositive/vector_reduce2.q +++ ql/src/test/queries/clientpositive/vector_reduce2.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reducesink.new.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -42,7 +43,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2; select s, i, s2 from vectortab2korc order by s, i, s2; diff --git ql/src/test/queries/clientpositive/vector_reduce3.q ql/src/test/queries/clientpositive/vector_reduce3.q index bf8206f..e01ed26 100644 --- ql/src/test/queries/clientpositive/vector_reduce3.q +++ ql/src/test/queries/clientpositive/vector_reduce3.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reducesink.new.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -42,7 +43,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select s from vectortab2korc order by s; select s from vectortab2korc order by s; diff --git ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q index 4a50150..bbd25ae 100644 --- ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q +++ ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q @@ -1,10 +1,12 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; + CREATE TABLE decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc WHERE cint is not null and cdouble is not null; SET hive.vectorized.execution.enabled=true; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 diff --git ql/src/test/queries/clientpositive/vector_string_concat.q ql/src/test/queries/clientpositive/vector_string_concat.q index f3a5965..b03c2a4 100644 --- ql/src/test/queries/clientpositive/vector_string_concat.q +++ ql/src/test/queries/clientpositive/vector_string_concat.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; DROP TABLE over1k; DROP TABLE over1korc; @@ -37,7 +38,7 @@ STORED AS ORC; INSERT INTO TABLE over1korc SELECT * FROM over1k; -EXPLAIN SELECT s AS `string`, +EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20; @@ -86,7 +87,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) diff --git ql/src/test/queries/clientpositive/vector_string_decimal.q ql/src/test/queries/clientpositive/vector_string_decimal.q index e69cd77..186e339 100644 --- ql/src/test/queries/clientpositive/vector_string_decimal.q +++ ql/src/test/queries/clientpositive/vector_string_decimal.q @@ -13,7 +13,7 @@ insert overwrite table orc_decimal select id from staging; set hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from orc_decimal where id in ('100000000', '200000000'); select * from orc_decimal where id in ('100000000', '200000000'); diff --git ql/src/test/queries/clientpositive/vector_struct_in.q ql/src/test/queries/clientpositive/vector_struct_in.q index 50487db..207be37 100644 --- ql/src/test/queries/clientpositive/vector_struct_in.q +++ ql/src/test/queries/clientpositive/vector_struct_in.q @@ -1,8 +1,9 @@ set hive.cbo.enable=false; -set hive.explain.user=true; +set hive.explain.user=false; set hive.tez.dynamic.partition.pruning=false; set hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -11,7 +12,7 @@ create table test_1 (`id` string, `lineid` string) stored as orc; insert into table test_1 values ('one','1'), ('seven','1'); -explain +explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -38,7 +39,7 @@ struct('nine','1'), struct('ten','1') ); -explain +explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -71,7 +72,7 @@ create table test_2 (`id` int, `lineid` int) stored as orc; insert into table test_2 values (1,1), (7,1); -explain +explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -98,7 +99,7 @@ struct(9,1), struct(10,1) ); -explain +explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -130,7 +131,7 @@ create table test_3 (`id` string, `lineid` int) stored as orc; insert into table test_3 values ('one',1), ('seven',1); -explain +explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -157,7 +158,7 @@ struct('nine',1), struct('ten',1) ); -explain +explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -189,7 +190,7 @@ create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5); -explain +explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -218,7 +219,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ); -explain +explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), diff --git ql/src/test/queries/clientpositive/vector_tablesample_rows.q ql/src/test/queries/clientpositive/vector_tablesample_rows.q index 4deb1c8..94b2f5b 100644 --- ql/src/test/queries/clientpositive/vector_tablesample_rows.q +++ ql/src/test/queries/clientpositive/vector_tablesample_rows.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; -explain +explain vectorization expression select 'key1', 'value1' from alltypesorc tablesample (1 rows); select 'key1', 'value1' from alltypesorc tablesample (1 rows); @@ -12,7 +12,7 @@ select 'key1', 'value1' from alltypesorc tablesample (1 rows); create table decimal_2 (t decimal(18,9)) stored as orc; -explain +explain vectorization expression insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows); @@ -25,12 +25,12 @@ drop table decimal_2; -- Dummy tables HIVE-13190 -explain +explain vectorization expression select count(1) from (select * from (Select 1 a) x order by x.a) y; select count(1) from (select * from (Select 1 a) x order by x.a) y; -explain +explain vectorization expression create temporary table dual as select 1; create temporary table dual as select 1; diff --git ql/src/test/queries/clientpositive/vector_udf2.q ql/src/test/queries/clientpositive/vector_udf2.q index b926c4f..e62af6a 100644 --- ql/src/test/queries/clientpositive/vector_udf2.q +++ ql/src/test/queries/clientpositive/vector_udf2.q @@ -7,7 +7,7 @@ create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20) insert overwrite table varchar_udf_2 select key, value, key, value from src where key = '238' limit 1; -explain +explain vectorization expression select c1 LIKE '%38%', c2 LIKE 'val_%', diff --git ql/src/test/queries/clientpositive/vector_udf3.q ql/src/test/queries/clientpositive/vector_udf3.q index 8a4df79..bc3a5e1 100644 --- ql/src/test/queries/clientpositive/vector_udf3.q +++ ql/src/test/queries/clientpositive/vector_udf3.q @@ -1,10 +1,11 @@ ADD JAR ivy://org.apache.hive.hive-it-custom-udfs:udf-vectorized-badexample:+; +set hive.fetch.task.conversion=none; CREATE TEMPORARY FUNCTION rot13 as 'hive.it.custom.udfs.GenericUDFRot13'; set hive.vectorized.execution.enabled=true; -EXPLAIN SELECT rot13(cstring1) from alltypesorc; +EXPLAIN VECTORIZATION EXPRESSION SELECT rot13(cstring1) from alltypesorc; SELECT cstring1, rot13(cstring1) from alltypesorc order by cstring1 desc limit 10; diff --git ql/src/test/queries/clientpositive/vector_varchar_4.q ql/src/test/queries/clientpositive/vector_varchar_4.q index 32a74a4..80f84d8 100644 --- ql/src/test/queries/clientpositive/vector_varchar_4.q +++ ql/src/test/queries/clientpositive/vector_varchar_4.q @@ -1,5 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; drop table if exists vectortab2k; drop table if exists vectortab2korc; @@ -44,7 +45,7 @@ INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; drop table if exists varchar_lazy_binary_columnar; create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; -explain +explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; -- insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; diff --git ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q index ac0570e..285d2ac 100644 --- ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q +++ ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q @@ -37,15 +37,15 @@ create table varchar_join1_vc2_orc stored as orc as select * from varchar_join1_ create table varchar_join1_str_orc stored as orc as select * from varchar_join1_str; -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1; select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1; drop table varchar_join1_vc1; diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q index acd6598..6f753a7 100644 --- ql/src/test/queries/clientpositive/vector_varchar_simple.q +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -1,5 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + drop table varchar_2; create table varchar_2 ( @@ -14,7 +16,7 @@ from src order by key asc limit 5; -explain select key, value +explain vectorization select key, value from varchar_2 order by key asc limit 5; @@ -30,7 +32,7 @@ from src order by key desc limit 5; -explain select key, value +explain vectorization select key, value from varchar_2 order by key desc limit 5; @@ -48,7 +50,7 @@ create table varchar_3 ( field varchar(25) ) stored as orc; -explain +explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10; insert into table varchar_3 select cint from alltypesorc limit 10; diff --git ql/src/test/queries/clientpositive/vector_when_case_null.q ql/src/test/queries/clientpositive/vector_when_case_null.q index a423b60..4acd6dc 100644 --- ql/src/test/queries/clientpositive/vector_when_case_null.q +++ ql/src/test/queries/clientpositive/vector_when_case_null.q @@ -8,7 +8,7 @@ set hive.fetch.task.conversion=none; create table count_case_groupby (key string, bool boolean) STORED AS orc; insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL); -explain +explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorization_0.q ql/src/test/queries/clientpositive/vectorization_0.q index f4cf8c4..c97cd9f 100644 --- ql/src/test/queries/clientpositive/vectorization_0.q +++ ql/src/test/queries/clientpositive/vectorization_0.q @@ -1,11 +1,12 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -20,7 +21,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1; @@ -29,7 +30,7 @@ SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -54,7 +55,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -69,7 +70,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1; @@ -78,7 +79,7 @@ SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -103,7 +104,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -118,7 +119,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1; @@ -127,7 +128,7 @@ SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -152,7 +153,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), diff --git ql/src/test/queries/clientpositive/vectorization_1.q ql/src/test/queries/clientpositive/vectorization_1.q index 8fdcb27..f71218f 100644 --- ql/src/test/queries/clientpositive/vectorization_1.q +++ ql/src/test/queries/clientpositive/vectorization_1.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_10.q ql/src/test/queries/clientpositive/vectorization_10.q index 778250a..c5f4d43 100644 --- ql/src/test/queries/clientpositive/vectorization_10.q +++ ql/src/test/queries/clientpositive/vectorization_10.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_11.q ql/src/test/queries/clientpositive/vectorization_11.q index 4ac42ac..3830ea9 100644 --- ql/src/test/queries/clientpositive/vectorization_11.q +++ ql/src/test/queries/clientpositive/vectorization_11.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_12.q ql/src/test/queries/clientpositive/vectorization_12.q index bc31f3c..0728ba9 100644 --- ql/src/test/queries/clientpositive/vectorization_12.q +++ ql/src/test/queries/clientpositive/vectorization_12.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_13.q ql/src/test/queries/clientpositive/vectorization_13.q index 005808b..84ae994 100644 --- ql/src/test/queries/clientpositive/vectorization_13.q +++ ql/src/test/queries/clientpositive/vectorization_13.q @@ -1,11 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -71,7 +71,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, LIMIT 40; -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_14.q ql/src/test/queries/clientpositive/vectorization_14.q index 4796c18..825fd63 100644 --- ql/src/test/queries/clientpositive/vectorization_14.q +++ ql/src/test/queries/clientpositive/vectorization_14.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, diff --git ql/src/test/queries/clientpositive/vectorization_15.q ql/src/test/queries/clientpositive/vectorization_15.q index 21ba8c8..5c48c58 100644 --- ql/src/test/queries/clientpositive/vectorization_15.q +++ ql/src/test/queries/clientpositive/vectorization_15.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, diff --git ql/src/test/queries/clientpositive/vectorization_16.q ql/src/test/queries/clientpositive/vectorization_16.q index 11b709f..822c824 100644 --- ql/src/test/queries/clientpositive/vectorization_16.q +++ ql/src/test/queries/clientpositive/vectorization_16.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_17.q ql/src/test/queries/clientpositive/vectorization_17.q index 1306f6b..57cdc41 100644 --- ql/src/test/queries/clientpositive/vectorization_17.q +++ ql/src/test/queries/clientpositive/vectorization_17.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, diff --git ql/src/test/queries/clientpositive/vectorization_2.q ql/src/test/queries/clientpositive/vectorization_2.q index f232815..4941d1e 100644 --- ql/src/test/queries/clientpositive/vectorization_2.q +++ ql/src/test/queries/clientpositive/vectorization_2.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_3.q ql/src/test/queries/clientpositive/vectorization_3.q index d6e6580..2e0350a 100644 --- ql/src/test/queries/clientpositive/vectorization_3.q +++ ql/src/test/queries/clientpositive/vectorization_3.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_4.q ql/src/test/queries/clientpositive/vectorization_4.q index 3151cf0..ba603c8 100644 --- ql/src/test/queries/clientpositive/vectorization_4.q +++ ql/src/test/queries/clientpositive/vectorization_4.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_5.q ql/src/test/queries/clientpositive/vectorization_5.q index 773f4b3..e2d4d0a 100644 --- ql/src/test/queries/clientpositive/vectorization_5.q +++ ql/src/test/queries/clientpositive/vectorization_5.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_6.q ql/src/test/queries/clientpositive/vectorization_6.q index 803f592..f55a2fb 100644 --- ql/src/test/queries/clientpositive/vectorization_6.q +++ ql/src/test/queries/clientpositive/vectorization_6.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_7.q ql/src/test/queries/clientpositive/vectorization_7.q index 131f570..bf3a1c2 100644 --- ql/src/test/queries/clientpositive/vectorization_7.q +++ ql/src/test/queries/clientpositive/vectorization_7.q @@ -1,11 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -60,7 +60,7 @@ LIMIT 25; -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, diff --git ql/src/test/queries/clientpositive/vectorization_8.q ql/src/test/queries/clientpositive/vectorization_8.q index 2d357f1..d43db26 100644 --- ql/src/test/queries/clientpositive/vectorization_8.q +++ ql/src/test/queries/clientpositive/vectorization_8.q @@ -1,11 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -56,7 +56,7 @@ LIMIT 20; -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, diff --git ql/src/test/queries/clientpositive/vectorization_9.q ql/src/test/queries/clientpositive/vectorization_9.q index 11b709f..822c824 100644 --- ql/src/test/queries/clientpositive/vectorization_9.q +++ ql/src/test/queries/clientpositive/vectorization_9.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_decimal_date.q ql/src/test/queries/clientpositive/vectorization_decimal_date.q index 854ee20..29c025c 100644 --- ql/src/test/queries/clientpositive/vectorization_decimal_date.q +++ ql/src/test/queries/clientpositive/vectorization_decimal_date.q @@ -1,5 +1,7 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; + CREATE TABLE date_decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (CAST (cint AS TIMESTAMP) AS DATE) AS cdate, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal FROM alltypesorc; SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; diff --git ql/src/test/queries/clientpositive/vectorization_div0.q ql/src/test/queries/clientpositive/vectorization_div0.q index 05d81d0..025d457 100644 --- ql/src/test/queries/clientpositive/vectorization_div0.q +++ ql/src/test/queries/clientpositive/vectorization_div0.q @@ -1,16 +1,17 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100; select cdouble / 0.0 from alltypesorc limit 100; -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100; select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) @@ -18,7 +19,7 @@ from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100; select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) diff --git ql/src/test/queries/clientpositive/vectorization_limit.q ql/src/test/queries/clientpositive/vectorization_limit.q index 707f1ed..a4c54f2 100644 --- ql/src/test/queries/clientpositive/vectorization_limit.q +++ ql/src/test/queries/clientpositive/vectorization_limit.q @@ -1,7 +1,9 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; +set hive.fetch.task.conversion=none; + +explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; set hive.optimize.reducededuplication.min.reducer=1; @@ -9,31 +11,31 @@ set hive.limit.pushdown.memory.usage=0.3f; -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; -- deduped RS -explain +explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; -- distincts -explain +explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20; select distinct(ctinyint) from alltypesorc limit 20; -explain +explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; -- limit zero -explain +explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; -- 2MR (applied to last RS) -explain +explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; diff --git ql/src/test/queries/clientpositive/vectorization_nested_udf.q ql/src/test/queries/clientpositive/vectorization_nested_udf.q index bb50f9b..da8f99c 100644 --- ql/src/test/queries/clientpositive/vectorization_nested_udf.q +++ ql/src/test/queries/clientpositive/vectorization_nested_udf.q @@ -1,3 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + SELECT SUM(abs(ctinyint)) from alltypesorc; diff --git ql/src/test/queries/clientpositive/vectorization_not.q ql/src/test/queries/clientpositive/vectorization_not.q index 7ac507b..aa691ab 100644 --- ql/src/test/queries/clientpositive/vectorization_not.q +++ ql/src/test/queries/clientpositive/vectorization_not.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), diff --git ql/src/test/queries/clientpositive/vectorization_offset_limit.q ql/src/test/queries/clientpositive/vectorization_offset_limit.q index 3d01154..97e1a05 100644 --- ql/src/test/queries/clientpositive/vectorization_offset_limit.q +++ ql/src/test/queries/clientpositive/vectorization_offset_limit.q @@ -1,10 +1,11 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.mapred.mode=nonstrict; +set hive.fetch.task.conversion=none; -explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; +explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; -explain +explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3; select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorization_part.q ql/src/test/queries/clientpositive/vectorization_part.q index 8d677db..9f4fc6e 100644 --- ql/src/test/queries/clientpositive/vectorization_part.q +++ ql/src/test/queries/clientpositive/vectorization_part.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds string) STORED AS ORC; insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc limit 100; insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc limit 100; diff --git ql/src/test/queries/clientpositive/vectorization_part_project.q ql/src/test/queries/clientpositive/vectorization_part_project.q index 3a48f20..d0dcb6f 100644 --- ql/src/test/queries/clientpositive/vectorization_part_project.q +++ ql/src/test/queries/clientpositive/vectorization_part_project.q @@ -1,9 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds string) STORED AS ORC; insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc order by ctinyint, cint, cbigint limit 100; insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc order by ctinyint, cint, cbigint limit 100; -explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10; +explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10; select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10; diff --git ql/src/test/queries/clientpositive/vectorization_part_varchar.q ql/src/test/queries/clientpositive/vectorization_part_varchar.q index d371de8..28646b9 100644 --- ql/src/test/queries/clientpositive/vectorization_part_varchar.q +++ ql/src/test/queries/clientpositive/vectorization_part_varchar.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + CREATE TABLE alltypesorc_part_varchar(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds varchar(4)) STORED AS ORC; insert overwrite table alltypesorc_part_varchar partition (ds='2011') select * from alltypesorc limit 100; insert overwrite table alltypesorc_part_varchar partition (ds='2012') select * from alltypesorc limit 100; diff --git ql/src/test/queries/clientpositive/vectorization_pushdown.q ql/src/test/queries/clientpositive/vectorization_pushdown.q index b33cfa7..8acb193 100644 --- ql/src/test/queries/clientpositive/vectorization_pushdown.q +++ ql/src/test/queries/clientpositive/vectorization_pushdown.q @@ -2,5 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.optimize.index.filter=true; -explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble; +set hive.fetch.task.conversion=none; + +explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble; SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble; diff --git ql/src/test/queries/clientpositive/vectorization_short_regress.q ql/src/test/queries/clientpositive/vectorization_short_regress.q index 114a3e2..03e4bbc 100644 --- ql/src/test/queries/clientpositive/vectorization_short_regress.q +++ ql/src/test/queries/clientpositive/vectorization_short_regress.q @@ -2,7 +2,7 @@ set hive.compute.query.using.stats=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -36,7 +36,8 @@ set hive.fetch.task.conversion=minimal; -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -112,7 +113,8 @@ WHERE ((762 = cbigint) -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -182,7 +184,8 @@ WHERE (((cbigint <= 197) -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -250,7 +253,8 @@ WHERE ((ctimestamp1 = ctimestamp2) -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -298,7 +302,8 @@ WHERE (((ctimestamp2 <= ctimestamp1) -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -376,7 +381,8 @@ LIMIT 50; -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -451,7 +457,8 @@ LIMIT 25; -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -524,7 +531,8 @@ LIMIT 75; -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -583,7 +591,8 @@ LIMIT 45; -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -628,7 +637,8 @@ LIMIT 20; -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -686,7 +696,8 @@ ORDER BY cdouble; -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -801,7 +812,8 @@ LIMIT 50; -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -883,12 +895,12 @@ ORDER BY cboolean1; -- These tests verify COUNT on empty or null colulmns work correctly. create table test_count(i int) stored as orc; -explain +explain vectorization expression select count(*) from test_count; select count(*) from test_count; -explain +explain vectorization expression select count(i) from test_count; select count(i) from test_count; @@ -911,32 +923,32 @@ insert into table alltypesnull select null, null, null, null, null, null, null, create table alltypesnullorc stored as orc as select * from alltypesnull; -explain +explain vectorization expression select count(*) from alltypesnullorc; select count(*) from alltypesnullorc; -explain +explain vectorization expression select count(ctinyint) from alltypesnullorc; select count(ctinyint) from alltypesnullorc; -explain +explain vectorization expression select count(cint) from alltypesnullorc; select count(cint) from alltypesnullorc; -explain +explain vectorization expression select count(cfloat) from alltypesnullorc; select count(cfloat) from alltypesnullorc; -explain +explain vectorization expression select count(cstring1) from alltypesnullorc; select count(cstring1) from alltypesnullorc; -explain +explain vectorization expression select count(cboolean1) from alltypesnullorc; select count(cboolean1) from alltypesnullorc; diff --git ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q index 022ce2e..191d8c6 100644 --- ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q +++ ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q @@ -1,4 +1,6 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; + create table vsmb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS @@ -28,11 +30,11 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; -explain +explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key; select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key; @@ -41,6 +43,6 @@ select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key; select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key; diff --git ql/src/test/queries/clientpositive/vectorized_case.q ql/src/test/queries/clientpositive/vectorized_case.q index e74bf82..2efacb4 100644 --- ql/src/test/queries/clientpositive/vectorized_case.q +++ ql/src/test/queries/clientpositive/vectorized_case.q @@ -2,7 +2,7 @@ set hive.explain.user=false; set hive.fetch.task.conversion=none; set hive.vectorized.execution.enabled = true ; -explain +explain vectorization expression select csmallint, case @@ -37,7 +37,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; -explain +explain vectorization expression select csmallint, case diff --git ql/src/test/queries/clientpositive/vectorized_casts.q ql/src/test/queries/clientpositive/vectorized_casts.q index 0880e29..a32c150 100644 --- ql/src/test/queries/clientpositive/vectorized_casts.q +++ ql/src/test/queries/clientpositive/vectorized_casts.q @@ -8,7 +8,7 @@ SET hive.vectorized.execution.enabled = true; -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) diff --git ql/src/test/queries/clientpositive/vectorized_context.q ql/src/test/queries/clientpositive/vectorized_context.q index 657270e..0558bc0 100644 --- ql/src/test/queries/clientpositive/vectorized_context.q +++ ql/src/test/queries/clientpositive/vectorized_context.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; + create table store(s_store_sk int, s_city string) stored as orc; insert overwrite table store @@ -26,7 +28,7 @@ set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; -explain +explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk diff --git ql/src/test/queries/clientpositive/vectorized_date_funcs.q ql/src/test/queries/clientpositive/vectorized_date_funcs.q index 899e922..4d56198 100644 --- ql/src/test/queries/clientpositive/vectorized_date_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_date_funcs.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; set hive.cli.print.header=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -25,7 +26,7 @@ INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, SELECT * FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, to_unix_timestamp(fl_time), year(fl_time), @@ -71,7 +72,7 @@ SELECT datediff(fl_time, timestamp "2007-03-14 08:21:59") FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_unix_timestamp(fl_date), year(fl_date), @@ -117,7 +118,7 @@ SELECT datediff(fl_date, timestamp "2007-03-14 08:21:59") FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, fl_date, year(fl_time) = year(fl_date), @@ -168,7 +169,7 @@ SELECT datediff(fl_date, "2007-03-14") = datediff(fl_date, date "2007-03-14") FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -189,7 +190,7 @@ FROM date_udf_flight_orc LIMIT 10; -- Test extracting the date part of expression that includes time SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), diff --git ql/src/test/queries/clientpositive/vectorized_distinct_gby.q ql/src/test/queries/clientpositive/vectorized_distinct_gby.q index 6900dc0..4339a5f 100644 --- ql/src/test/queries/clientpositive/vectorized_distinct_gby.q +++ ql/src/test/queries/clientpositive/vectorized_distinct_gby.q @@ -1,14 +1,15 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; SET hive.map.groupby.sorted=true; create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc; insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c; -explain select sum(distinct a), count(distinct a) from dtest; +explain vectorization select sum(distinct a), count(distinct a) from dtest; select sum(distinct a), count(distinct a) from dtest; -explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; +explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; diff --git ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q index 2d3788d..d2ded71 100644 --- ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q +++ ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q @@ -7,33 +7,34 @@ set hive.tez.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; select distinct ds from srcpart; select distinct hr from srcpart; -EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; +EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_date stored as orc as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_hour stored as orc as select hr as hr, hr as hour from srcpart group by hr; create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr; create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as hour from srcpart group by hr; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; @@ -41,77 +42,77 @@ set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = 'I DONT EXIST'; -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where cast(hr as string) = 11; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); set hive.auto.convert.join=true; @@ -119,60 +120,60 @@ set hive.auto.convert.join.noconditionaltask = true; set hive.auto.convert.join.noconditionaltask.size = 10000000; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart where hr = 11; set hive.stats.fetch.column.stats=false; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; set hive.stats.fetch.column.stats=true; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); @@ -185,7 +186,7 @@ set hive.vectorized.execution.enabled=false; set hive.exec.max.dynamic.partitions=1000; insert into table srcpart_orc partition (ds, hr) select key, value, ds, hr from srcpart; -EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); +EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11; diff --git ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q index e1eefff..2aa4d02 100644 --- ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q +++ ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q @@ -16,27 +16,27 @@ create table dsrv_big stored as orc as select key as key_str, cast(key as int) a create table dsrv_small stored as orc as select distinct key as key_str, cast(key as int) as key_int, value from src where key < 100; -- single key (int) -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int); select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int); -- single key (string) -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); -- keys are different type -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_str); -- multiple tables -EXPLAIN select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int; +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int; select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int; -- multiple keys -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int); select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int); -- small table result is empty -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2'); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2'); select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2'); drop table dsrv_big; diff --git ql/src/test/queries/clientpositive/vectorized_mapjoin.q ql/src/test/queries/clientpositive/vectorized_mapjoin.q index 6500d41..138c133 100644 --- ql/src/test/queries/clientpositive/vectorized_mapjoin.q +++ ql/src/test/queries/clientpositive/vectorized_mapjoin.q @@ -4,10 +4,11 @@ SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint; diff --git ql/src/test/queries/clientpositive/vectorized_mapjoin2.q ql/src/test/queries/clientpositive/vectorized_mapjoin2.q index 137acbc..d259547 100644 --- ql/src/test/queries/clientpositive/vectorized_mapjoin2.q +++ ql/src/test/queries/clientpositive/vectorized_mapjoin2.q @@ -15,7 +15,7 @@ create temporary table y (b int) stored as orc; insert into x values(1); insert into y values(1); -explain +explain vectorization expression select count(1) from x, y where a = b; select count(1) from x, y where a = b; diff --git ql/src/test/queries/clientpositive/vectorized_math_funcs.q ql/src/test/queries/clientpositive/vectorized_math_funcs.q index b01c468..6a10770 100644 --- ql/src/test/queries/clientpositive/vectorized_math_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_math_funcs.q @@ -1,9 +1,10 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdouble ,Round(cdouble, 2) diff --git ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q index 4332898..5b07c9f 100644 --- ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q +++ ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q @@ -4,9 +4,10 @@ SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; diff --git ql/src/test/queries/clientpositive/vectorized_parquet.q ql/src/test/queries/clientpositive/vectorized_parquet.q index da138e0..e6ebdaa 100644 --- ql/src/test/queries/clientpositive/vectorized_parquet.q +++ ql/src/test/queries/clientpositive/vectorized_parquet.q @@ -21,7 +21,7 @@ insert overwrite table alltypes_parquet SET hive.vectorized.execution.enabled=true; -explain select * +explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10; @@ -30,7 +30,7 @@ select * where cint = 528534767 limit 10; -explain select ctinyint, +explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), diff --git ql/src/test/queries/clientpositive/vectorized_parquet_types.q ql/src/test/queries/clientpositive/vectorized_parquet_types.q index 297c5af..68761b6 100644 --- ql/src/test/queries/clientpositive/vectorized_parquet_types.q +++ ql/src/test/queries/clientpositive/vectorized_parquet_types.q @@ -48,19 +48,19 @@ SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, unhex(cbinary), cdecimal FROM parquet_types_staging; -- select -explain +explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; -explain +explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; -explain +explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), diff --git ql/src/test/queries/clientpositive/vectorized_ptf.q ql/src/test/queries/clientpositive/vectorized_ptf.q index db2dbe1..232aa11 100644 --- ql/src/test/queries/clientpositive/vectorized_ptf.q +++ ql/src/test/queries/clientpositive/vectorized_ptf.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -42,7 +43,7 @@ insert into table part_orc select * from part_staging; --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -63,7 +64,7 @@ from noop(on part_orc -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -80,7 +81,7 @@ sort by j.p_name) -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -93,7 +94,7 @@ order by p_name); -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -114,7 +115,7 @@ from noop(on part_orc -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -137,7 +138,7 @@ from noop(on part_orc -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,7 +163,7 @@ group by p_mfgr, p_name, p_size -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -177,7 +178,7 @@ order by p_name -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -192,7 +193,7 @@ order by p_name -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -207,7 +208,7 @@ order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -226,7 +227,7 @@ from noopwithmap(on part_orc -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -247,7 +248,7 @@ order by p_name) -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -268,7 +269,7 @@ order by p_mfgr, p_name -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -295,7 +296,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -322,7 +323,7 @@ order by p_name -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -341,7 +342,7 @@ round(sum(p_retailprice),2) as s from part_orc group by p_mfgr, p_brand; -explain extended +explain vectorization extended select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -375,7 +376,7 @@ dr INT, cud DOUBLE, fv1 INT); -explain extended +explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -412,7 +413,7 @@ select * from part_5; -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -447,7 +448,7 @@ from noop(on -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -482,7 +483,7 @@ from noop(on -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -514,7 +515,7 @@ from noop(on -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -549,7 +550,7 @@ from noopwithmap(on -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -582,7 +583,7 @@ from noop(on -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, diff --git ql/src/test/queries/clientpositive/vectorized_shufflejoin.q ql/src/test/queries/clientpositive/vectorized_shufflejoin.q index f57d062..9227de0 100644 --- ql/src/test/queries/clientpositive/vectorized_shufflejoin.q +++ ql/src/test/queries/clientpositive/vectorized_shufflejoin.q @@ -2,10 +2,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=false; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT; diff --git ql/src/test/queries/clientpositive/vectorized_string_funcs.q ql/src/test/queries/clientpositive/vectorized_string_funcs.q index d04a3c3..ee95c0b 100644 --- ql/src/test/queries/clientpositive/vectorized_string_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_string_funcs.q @@ -1,9 +1,10 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) diff --git ql/src/test/queries/clientpositive/vectorized_timestamp.q ql/src/test/queries/clientpositive/vectorized_timestamp.q index 2784b7a..ceee2ee 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp.q @@ -6,23 +6,23 @@ CREATE TABLE test(ts TIMESTAMP) STORED AS ORC; INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999'); SET hive.vectorized.execution.enabled = false; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SET hive.vectorized.execution.enabled = true; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index aaf85fc..afbc18a 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. -- Turning on vectorization has been temporarily moved after filling the test table -- due to bug HIVE-8197. @@ -23,7 +24,7 @@ INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -49,7 +50,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -75,7 +76,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -103,7 +104,7 @@ FROM alltypesorc_string ORDER BY c1; -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -129,7 +130,7 @@ SELECT FROM alltypesorc_wrong ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), @@ -144,7 +145,7 @@ SELECT FROM alltypesorc_string; -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; @@ -152,7 +153,7 @@ SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q index 15964c9..e6e6d5d 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q @@ -1,8 +1,9 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled = true; SET hive.int.timestamp.conversion.in.seconds=false; +set hive.fetch.task.conversion=none; -explain +explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -40,7 +41,7 @@ where cbigint % 250 = 0; SET hive.int.timestamp.conversion.in.seconds=true; -explain +explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) diff --git ql/src/test/results/clientpositive/llap/acid_vectorization.q.out ql/src/test/results/clientpositive/llap/acid_vectorization.q.out index b9b4ce4..6171ce4 100644 --- ql/src/test/results/clientpositive/llap/acid_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/acid_vectorization.q.out @@ -58,3 +58,63 @@ POSTHOOK: Input: default@acid_vectorized -1070883071 0ruyd6Y50JpdGRf6HqD -1070551679 iUR3Q -1069736047 k17Am8uPHWk02cEf1jet +PREHOOK: query: CREATE TABLE acid_fast_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: CREATE TABLE acid_fast_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_fast_vectorized +PREHOOK: query: insert into table acid_fast_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: insert into table acid_fast_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_fast_vectorized +POSTHOOK: Lineage: acid_fast_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_fast_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert into table acid_fast_vectorized values (1, 'bar') +PREHOOK: type: QUERY +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: insert into table acid_fast_vectorized values (1, 'bar') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@acid_fast_vectorized +POSTHOOK: Lineage: acid_fast_vectorized.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_fast_vectorized.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: update acid_fast_vectorized set b = 'foo' where b = 'bar' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_fast_vectorized +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: update acid_fast_vectorized set b = 'foo' where b = 'bar' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_fast_vectorized +POSTHOOK: Output: default@acid_fast_vectorized +PREHOOK: query: delete from acid_fast_vectorized where b = 'foo' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_fast_vectorized +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: delete from acid_fast_vectorized where b = 'foo' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_fast_vectorized +POSTHOOK: Output: default@acid_fast_vectorized +PREHOOK: query: select a, b from acid_fast_vectorized order by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_fast_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select a, b from acid_fast_vectorized order by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_fast_vectorized +#### A masked pattern was here #### +-1073279343 oj1YrV5Wa +-1073051226 A34p7oRr2WvUJNf +-1072910839 0iqrc5 +-1072081801 dPkN74F7 +-1072076362 2uLyD28144vklju213J1mr +-1071480828 aw724t8c5558x2xneC624 +-1071363017 Anj0oF +-1070883071 0ruyd6Y50JpdGRf6HqD +-1070551679 iUR3Q +-1069736047 k17Am8uPHWk02cEf1jet diff --git ql/src/test/results/clientpositive/llap/mergejoin.q.out ql/src/test/results/clientpositive/llap/mergejoin.q.out index 6114548..bb07f4a 100644 --- ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -1831,7 +1831,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 6 Map Operator Tree: @@ -2054,7 +2054,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -2400,7 +2400,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -2524,7 +2524,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 6 Map Operator Tree: diff --git ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out index 5b1669e..1d3ac6e 100644 --- ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out +++ ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out @@ -18,13 +18,11 @@ POSTHOOK: Lineage: newtypestbl.c EXPRESSION [] POSTHOOK: Lineage: newtypestbl.d EXPRESSION [] POSTHOOK: Lineage: newtypestbl.da EXPRESSION [] POSTHOOK: Lineage: newtypestbl.v EXPRESSION [] -PREHOOK: query: -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) -select * from newtypestbl where d=0.22 +PREHOOK: query: select * from newtypestbl where d=0.22 PREHOOK: type: QUERY PREHOOK: Input: default@newtypestbl #### A masked pattern was here #### -POSTHOOK: query: -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) -select * from newtypestbl where d=0.22 +POSTHOOK: query: select * from newtypestbl where d=0.22 POSTHOOK: type: QUERY POSTHOOK: Input: default@newtypestbl #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/llap/pcs.q.out ql/src/test/results/clientpositive/llap/pcs.q.out index b3844ee..21c2652 100644 --- ql/src/test/results/clientpositive/llap/pcs.q.out +++ ql/src/test/results/clientpositive/llap/pcs.q.out @@ -130,6 +130,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -151,6 +152,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -175,6 +177,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -196,6 +199,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -283,6 +287,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -304,6 +309,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -326,6 +332,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -347,6 +354,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -406,6 +414,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -427,6 +436,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -449,6 +459,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -470,6 +481,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -570,6 +582,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -591,6 +604,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -615,6 +629,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -636,6 +651,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -683,6 +699,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -704,6 +721,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -728,6 +746,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -749,6 +768,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -848,6 +868,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -869,6 +890,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -891,6 +913,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -912,6 +935,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1209,6 +1233,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1230,6 +1255,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1306,6 +1332,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1327,6 +1354,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1349,6 +1377,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1370,6 +1399,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1416,6 +1446,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1437,6 +1468,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1459,6 +1491,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1480,6 +1513,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1502,6 +1536,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1523,6 +1558,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1586,6 +1622,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1607,6 +1644,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1629,6 +1667,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1650,6 +1689,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1672,6 +1712,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1693,6 +1734,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out index 86c94f7..c6fc7b5 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out @@ -53,25 +53,73 @@ POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).b SIMPLE [(valu POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=101) - default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_permute_select + Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY @@ -158,25 +206,73 @@ POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).c EXPRES POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=145) - default@part_add_int_string_permute_select,part_add_int_string_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_string_permute_select + Statistics: Num rows: 2 Data size: 290 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY @@ -321,25 +417,73 @@ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c2 SIMPLE [ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c3 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:double1, type:double, comment:null), ] POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num double1 double1 double1 _c4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=5 width=426) - default@part_change_string_group_double,part_change_string_group_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_double + Statistics: Num rows: 5 Data size: 2130 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY @@ -421,25 +565,73 @@ POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION( POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=586) - default@part_change_date_group_string_group_date_timestamp,part_change_date_group_string_group_date_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_date_group_string_group_date_timestamp + Statistics: Num rows: 6 Data size: 3521 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: char(50)), c3 (type: char(15)), c4 (type: varchar(50)), c5 (type: varchar(15)), c6 (type: string), c7 (type: char(50)), c8 (type: char(15)), c9 (type: varchar(50)), c10 (type: varchar(15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:string, c2:char(50), c3:char(15), c4:varchar(50), c5:varchar(15), c6:string, c7:char(50), c8:char(15), c9:varchar(50), c10:varchar(15), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY @@ -590,25 +782,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_grou POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - TableScan [TS_0] (rows=6 width=483) - default@part_change_numeric_group_string_group_multi_ints_string_group,part_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 6 Data size: 2903 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -749,25 +989,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 6 Data size: 4540 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 17, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - TableScan [TS_0] (rows=6 width=756) - default@part_change_numeric_group_string_group_floating_string_group,part_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY @@ -894,25 +1182,73 @@ POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1 POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_string_group_string + Statistics: Num rows: 6 Data size: 6682 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(50)), c2 (type: char(9)), c3 (type: varchar(50)), c4 (type: char(9)), c5 (type: varchar(50)), c6 (type: varchar(9)), c7 (type: string), c8 (type: char(50)), c9 (type: char(9)), c10 (type: string), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:char(50), c2:char(9), c3:varchar(50), c4:char(9), c5:varchar(50), c6:varchar(9), c7:string, c8:char(50), c9:char(9), c10:string, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=1113) - default@part_change_string_group_string_group_string,part_change_string_group_string_group_string,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY @@ -1067,25 +1403,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint P POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - TableScan [TS_0] (rows=6 width=236) - default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint,part_change_lower_to_higher_numeric_group_tinyint_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_tinyint_to_bigint + Statistics: Num rows: 6 Data size: 1419 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: smallint), c2 (type: int), c3 (type: bigint), c4 (type: decimal(38,18)), c5 (type: float), c6 (type: double), c7 (type: int), c8 (type: bigint), c9 (type: decimal(38,18)), c10 (type: float), c11 (type: double), c12 (type: bigint), c13 (type: decimal(38,18)), c14 (type: float), c15 (type: double), c16 (type: decimal(38,18)), c17 (type: float), c18 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 20 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + dataColumns: insert_num:int, c1:smallint, c2:int, c3:bigint, c4:decimal(38,18), c5:float, c6:double, c7:int, c8:bigint, c9:decimal(38,18), c10:float, c11:double, c12:bigint, c13:decimal(38,18), c14:float, c15:double, c16:decimal(38,18), c17:float, c18:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY @@ -1182,25 +1566,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PA POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).c3 EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_decimal_to_float + Statistics: Num rows: 6 Data size: 1523 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: float), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:float, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=6 width=253) - default@part_change_lower_to_higher_numeric_group_decimal_to_float,part_change_lower_to_higher_numeric_group_decimal_to_float,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out index 3da9284..d0cafaa 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out @@ -131,25 +131,55 @@ POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).insert_num SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).s1 SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:s1, type:struct, comment:null), ] complex_struct1_c_txt.insert_num complex_struct1_c_txt.s1 complex_struct1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=6 width=789) - default@part_change_various_various_struct1,part_change_various_various_struct1,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","s1","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_struct1 + Statistics: Num rows: 6 Data size: 4734 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), s1 (type: struct), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s1] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY @@ -383,25 +413,55 @@ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).b SIMPLE [ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).insert_num SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).s2 SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:s2, type:struct, comment:null), ] complex_struct2_d_txt.insert_num complex_struct2_d_txt.b complex_struct2_d_txt.s2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=8 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=8 width=614) - default@part_add_various_various_struct2,part_add_various_various_struct2,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s2"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_various_various_struct2 + Statistics: Num rows: 8 Data size: 4912 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s2] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY @@ -563,25 +623,55 @@ POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).insert_num SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).s3 SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:s3, type:struct, comment:null), ] complex_struct4_c_txt.insert_num complex_struct4_c_txt.b complex_struct4_c_txt.s3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_to_various_various_struct4 + Statistics: Num rows: 4 Data size: 1172 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s3] not supported + vectorized: false -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=4 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=4 width=293) - default@part_add_to_various_various_struct4,part_add_to_various_various_struct4,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s3"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out index f155b00..3a5232a 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out @@ -242,25 +242,73 @@ POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part= POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).c9 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=10 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"] - TableScan [TS_0] (rows=10 width=1168) - default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_boolean_to_bigint + Statistics: Num rows: 10 Data size: 11688 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 (type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type: boolean), c9 (type: boolean), c10 (type: tinyint), c11 (type: tinyint), c12 (type: tinyint), c13 (type: tinyint), c14 (type: tinyint), c15 (type: tinyint), c16 (type: tinyint), c17 (type: tinyint), c18 (type: tinyint), c19 (type: tinyint), c20 (type: tinyint), c21 (type: smallint), c22 (type: smallint), c23 (type: smallint), c24 (type: smallint), c25 (type: smallint), c26 (type: smallint), c27 (type: smallint), c28 (type: smallint), c29 (type: smallint), c30 (type: smallint), c31 (type: smallint), c32 (type: int), c33 (type: int), c34 (type: int), c35 (type: int), c36 (type: int), c37 (type: int), c38 (type: int), c39 (type: int), c40 (type: int), c41 (type: int), c42 (type: int), c43 (type: bigint), c44 (type: bigint), c45 (type: bigint), c46 (type: bigint), c47 (type: bigint), c48 (type: bigint), c49 (type: bigint), c50 (type: bigint), c51 (type: bigint), c52 (type: bigint), c53 (type: bigint), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 55, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 55 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + dataColumns: insert_num:int, c1:boolean, c2:boolean, c3:boolean, c4:boolean, c5:boolean, c6:boolean, c7:boolean, c8:boolean, c9:boolean, c10:tinyint, c11:tinyint, c12:tinyint, c13:tinyint, c14:tinyint, c15:tinyint, c16:tinyint, c17:tinyint, c18:tinyint, c19:tinyint, c20:tinyint, c21:smallint, c22:smallint, c23:smallint, c24:smallint, c25:smallint, c26:smallint, c27:smallint, c28:smallint, c29:smallint, c30:smallint, c31:smallint, c32:int, c33:int, c34:int, c35:int, c36:int, c37:int, c38:int, c39:int, c40:int, c41:int, c42:int, c43:bigint, c44:bigint, c45:bigint, c46:bigint, c47:bigint, c48:bigint, c49:bigint, c50:bigint, c51:bigint, c52:bigint, c53:bigint, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY @@ -443,25 +491,73 @@ POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part= POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:decimal1, type:decimal(38,18), comment:null), ] POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 _c34 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35"] - TableScan [TS_0] (rows=6 width=1382) - default@part_change_various_various_decimal_to_double,part_change_various_various_decimal_to_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_decimal_to_double + Statistics: Num rows: 6 Data size: 8295 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: decimal(38,18)), c2 (type: decimal(38,18)), c3 (type: decimal(38,18)), c4 (type: decimal(38,18)), c5 (type: decimal(38,18)), c6 (type: decimal(38,18)), c7 (type: decimal(38,18)), c8 (type: decimal(38,18)), c9 (type: decimal(38,18)), c10 (type: decimal(38,18)), c11 (type: decimal(38,18)), c12 (type: float), c13 (type: float), c14 (type: float), c15 (type: float), c16 (type: float), c17 (type: float), c18 (type: float), c19 (type: float), c20 (type: float), c21 (type: float), c22 (type: float), c23 (type: double), c24 (type: double), c25 (type: double), c26 (type: double), c27 (type: double), c28 (type: double), c29 (type: double), c30 (type: double), c31 (type: double), c32 (type: double), c33 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 35, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 35 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + dataColumns: insert_num:int, c1:decimal(38,18), c2:decimal(38,18), c3:decimal(38,18), c4:decimal(38,18), c5:decimal(38,18), c6:decimal(38,18), c7:decimal(38,18), c8:decimal(38,18), c9:decimal(38,18), c10:decimal(38,18), c11:decimal(38,18), c12:float, c13:float, c14:float, c15:float, c16:float, c17:float, c18:float, c19:float, c20:float, c21:float, c22:float, c23:double, c24:double, c25:double, c26:double, c27:double, c28:double, c29:double, c30:double, c31:double, c32:double, c33:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY @@ -566,25 +662,73 @@ POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c8 SI POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:timestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 _c13 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - TableScan [TS_0] (rows=6 width=494) - default@part_change_various_various_timestamp,part_change_various_various_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_timestamp + Statistics: Num rows: 6 Data size: 2965 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: timestamp), c2 (type: timestamp), c3 (type: timestamp), c4 (type: timestamp), c5 (type: timestamp), c6 (type: timestamp), c7 (type: timestamp), c8 (type: timestamp), c9 (type: timestamp), c10 (type: timestamp), c11 (type: timestamp), c12 (type: timestamp), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + dataColumns: insert_num:int, c1:timestamp, c2:timestamp, c3:timestamp, c4:timestamp, c5:timestamp, c6:timestamp, c7:timestamp, c8:timestamp, c9:timestamp, c10:timestamp, c11:timestamp, c12:timestamp, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY @@ -673,25 +817,73 @@ POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c3 SIMPLE POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c4 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:date1, type:date, comment:null), ] POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num date1 date1 date1 date1 _c5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_date + Statistics: Num rows: 6 Data size: 2444 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: date), c2 (type: date), c3 (type: date), c4 (type: date), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 2, 3, 4, 5] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + includeColumns: [0, 1, 2, 3, 4, 5] + dataColumns: insert_num:int, c1:date, c2:date, c3:date, c4:date, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - TableScan [TS_0] (rows=6 width=407) - default@part_change_various_various_date,part_change_various_various_date,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY @@ -861,25 +1053,73 @@ POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c5 S POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c6 SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:c6, type:decimal(25,15), comment:null), ] POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).insert_num SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] same_type1_c_txt.insert_num same_type1_c_txt.c1 same_type1_c_txt.c2 same_type1_c_txt.c3 same_type1_c_txt.c4 same_type1_c_txt.c5 same_type1_c_txt.c6 same_type1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_same_type_different_params + Statistics: Num rows: 13 Data size: 8736 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(8)), c2 (type: char(32)), c3 (type: varchar(15)), c4 (type: varchar(18)), c5 (type: decimal(10,2)), c6 (type: decimal(25,15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 1, 2, 3, 4, 5, 6, 7] + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7] + dataColumns: insert_num:int, c1:char(8), c2:char(32), c3:varchar(15), c4:varchar(18), c5:decimal(10,2), c6:decimal(25,15), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=13 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - TableScan [TS_0] (rows=13 width=672) - default@part_change_same_type_different_params,part_change_same_type_different_params,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out index ecc4ee6..2cd6005 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out @@ -55,25 +55,72 @@ POSTHOOK: Lineage: table_add_int_permute_select.b SIMPLE [(values__tmp__table__1 POSTHOOK: Lineage: table_add_int_permute_select.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_permute_select.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=99) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=99) - default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_permute_select + Statistics: Num rows: 6 Data size: 595 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 6 Data size: 595 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 595 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY @@ -168,25 +215,72 @@ POSTHOOK: Lineage: table_add_int_string_permute_select.c EXPRESSION [(values__tm POSTHOOK: Lineage: table_add_int_string_permute_select.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_string_permute_select.insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=114) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=114) - default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_string_permute_select + Statistics: Num rows: 6 Data size: 685 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 6 Data size: 685 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 685 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,a,b from table_add_int_string_permute_select PREHOOK: type: QUERY @@ -343,25 +437,72 @@ POSTHOOK: Lineage: table_change_string_group_double.c2 EXPRESSION [(values__tmp_ POSTHOOK: Lineage: table_change_string_group_double.c3 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_change_string_group_double.insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=370) - Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=6 width=370) - default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_string_group_double + Statistics: Num rows: 6 Data size: 2225 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 6 Data size: 2225 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 2225 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY @@ -587,25 +728,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_gro POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 6 Data size: 2879 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 6 Data size: 2879 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 2879 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=479) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=6 width=479) - default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -742,25 +930,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 6 Data size: 4516 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6 Data size: 4516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 4516 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=752) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=6 width=752) - default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out index 1fe9a13..45635ee 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out @@ -53,25 +53,73 @@ POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).b SIMPLE [(valu POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=16) - default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_permute_select + Statistics: Num rows: 2 Data size: 33 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY @@ -158,25 +206,73 @@ POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).c EXPRES POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=19) - default@part_add_int_string_permute_select,part_add_int_string_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_string_permute_select + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY @@ -321,25 +417,73 @@ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c2 SIMPLE [ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c3 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:double1, type:double, comment:null), ] POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num double1 double1 double1 _c4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=5 width=94) - default@part_change_string_group_double,part_change_string_group_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_double + Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY @@ -421,25 +565,73 @@ POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION( POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=172) - default@part_change_date_group_string_group_date_timestamp,part_change_date_group_string_group_date_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_date_group_string_group_date_timestamp + Statistics: Num rows: 6 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: char(50)), c3 (type: char(15)), c4 (type: varchar(50)), c5 (type: varchar(15)), c6 (type: string), c7 (type: char(50)), c8 (type: char(15)), c9 (type: varchar(50)), c10 (type: varchar(15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:string, c2:char(50), c3:char(15), c4:varchar(50), c5:varchar(15), c6:string, c7:char(50), c8:char(15), c9:varchar(50), c10:varchar(15), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY @@ -590,25 +782,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_grou POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - TableScan [TS_0] (rows=6 width=182) - default@part_change_numeric_group_string_group_multi_ints_string_group,part_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 6 Data size: 1094 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -749,25 +989,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 6 Data size: 1521 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 17, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - TableScan [TS_0] (rows=6 width=253) - default@part_change_numeric_group_string_group_floating_string_group,part_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY @@ -894,25 +1182,73 @@ POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1 POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_string_group_string + Statistics: Num rows: 6 Data size: 1205 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(50)), c2 (type: char(9)), c3 (type: varchar(50)), c4 (type: char(9)), c5 (type: varchar(50)), c6 (type: varchar(9)), c7 (type: string), c8 (type: char(50)), c9 (type: char(9)), c10 (type: string), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:char(50), c2:char(9), c3:varchar(50), c4:char(9), c5:varchar(50), c6:varchar(9), c7:string, c8:char(50), c9:char(9), c10:string, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=200) - default@part_change_string_group_string_group_string,part_change_string_group_string_group_string,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY @@ -1067,25 +1403,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint P POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - TableScan [TS_0] (rows=6 width=143) - default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint,part_change_lower_to_higher_numeric_group_tinyint_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_tinyint_to_bigint + Statistics: Num rows: 6 Data size: 860 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: smallint), c2 (type: int), c3 (type: bigint), c4 (type: decimal(38,18)), c5 (type: float), c6 (type: double), c7 (type: int), c8 (type: bigint), c9 (type: decimal(38,18)), c10 (type: float), c11 (type: double), c12 (type: bigint), c13 (type: decimal(38,18)), c14 (type: float), c15 (type: double), c16 (type: decimal(38,18)), c17 (type: float), c18 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 20 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + dataColumns: insert_num:int, c1:smallint, c2:int, c3:bigint, c4:decimal(38,18), c5:float, c6:double, c7:int, c8:bigint, c9:decimal(38,18), c10:float, c11:double, c12:bigint, c13:decimal(38,18), c14:float, c15:double, c16:decimal(38,18), c17:float, c18:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY @@ -1182,25 +1566,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PA POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).c3 EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_decimal_to_float + Statistics: Num rows: 6 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: float), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:float, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=6 width=71) - default@part_change_lower_to_higher_numeric_group_decimal_to_float,part_change_lower_to_higher_numeric_group_decimal_to_float,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out index be42c05..97270fc 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out @@ -131,25 +131,55 @@ POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).insert_num SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).s1 SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:s1, type:struct, comment:null), ] complex_struct1_c_txt.insert_num complex_struct1_c_txt.s1 complex_struct1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=6 width=155) - default@part_change_various_various_struct1,part_change_various_various_struct1,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","s1","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_struct1 + Statistics: Num rows: 6 Data size: 931 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), s1 (type: struct), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s1] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY @@ -383,25 +413,55 @@ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).b SIMPLE [ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).insert_num SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).s2 SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:s2, type:struct, comment:null), ] complex_struct2_d_txt.insert_num complex_struct2_d_txt.b complex_struct2_d_txt.s2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=8 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=8 width=117) - default@part_add_various_various_struct2,part_add_various_various_struct2,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s2"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_various_various_struct2 + Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s2] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY @@ -563,25 +623,55 @@ POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).insert_num SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).s3 SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:s3, type:struct, comment:null), ] complex_struct4_c_txt.insert_num complex_struct4_c_txt.b complex_struct4_c_txt.s3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_to_various_various_struct4 + Statistics: Num rows: 4 Data size: 353 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s3] not supported + vectorized: false -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=4 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=4 width=88) - default@part_add_to_various_various_struct4,part_add_to_various_various_struct4,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s3"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out index b8601df..7cd9820 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out @@ -242,25 +242,73 @@ POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part= POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).c9 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=10 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"] - TableScan [TS_0] (rows=10 width=507) - default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_boolean_to_bigint + Statistics: Num rows: 10 Data size: 5074 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 (type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type: boolean), c9 (type: boolean), c10 (type: tinyint), c11 (type: tinyint), c12 (type: tinyint), c13 (type: tinyint), c14 (type: tinyint), c15 (type: tinyint), c16 (type: tinyint), c17 (type: tinyint), c18 (type: tinyint), c19 (type: tinyint), c20 (type: tinyint), c21 (type: smallint), c22 (type: smallint), c23 (type: smallint), c24 (type: smallint), c25 (type: smallint), c26 (type: smallint), c27 (type: smallint), c28 (type: smallint), c29 (type: smallint), c30 (type: smallint), c31 (type: smallint), c32 (type: int), c33 (type: int), c34 (type: int), c35 (type: int), c36 (type: int), c37 (type: int), c38 (type: int), c39 (type: int), c40 (type: int), c41 (type: int), c42 (type: int), c43 (type: bigint), c44 (type: bigint), c45 (type: bigint), c46 (type: bigint), c47 (type: bigint), c48 (type: bigint), c49 (type: bigint), c50 (type: bigint), c51 (type: bigint), c52 (type: bigint), c53 (type: bigint), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 55, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 55 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + dataColumns: insert_num:int, c1:boolean, c2:boolean, c3:boolean, c4:boolean, c5:boolean, c6:boolean, c7:boolean, c8:boolean, c9:boolean, c10:tinyint, c11:tinyint, c12:tinyint, c13:tinyint, c14:tinyint, c15:tinyint, c16:tinyint, c17:tinyint, c18:tinyint, c19:tinyint, c20:tinyint, c21:smallint, c22:smallint, c23:smallint, c24:smallint, c25:smallint, c26:smallint, c27:smallint, c28:smallint, c29:smallint, c30:smallint, c31:smallint, c32:int, c33:int, c34:int, c35:int, c36:int, c37:int, c38:int, c39:int, c40:int, c41:int, c42:int, c43:bigint, c44:bigint, c45:bigint, c46:bigint, c47:bigint, c48:bigint, c49:bigint, c50:bigint, c51:bigint, c52:bigint, c53:bigint, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY @@ -443,25 +491,73 @@ POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part= POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:decimal1, type:decimal(38,18), comment:null), ] POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 _c34 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35"] - TableScan [TS_0] (rows=6 width=453) - default@part_change_various_various_decimal_to_double,part_change_various_various_decimal_to_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_decimal_to_double + Statistics: Num rows: 6 Data size: 2723 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: decimal(38,18)), c2 (type: decimal(38,18)), c3 (type: decimal(38,18)), c4 (type: decimal(38,18)), c5 (type: decimal(38,18)), c6 (type: decimal(38,18)), c7 (type: decimal(38,18)), c8 (type: decimal(38,18)), c9 (type: decimal(38,18)), c10 (type: decimal(38,18)), c11 (type: decimal(38,18)), c12 (type: float), c13 (type: float), c14 (type: float), c15 (type: float), c16 (type: float), c17 (type: float), c18 (type: float), c19 (type: float), c20 (type: float), c21 (type: float), c22 (type: float), c23 (type: double), c24 (type: double), c25 (type: double), c26 (type: double), c27 (type: double), c28 (type: double), c29 (type: double), c30 (type: double), c31 (type: double), c32 (type: double), c33 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 35, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 35 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + dataColumns: insert_num:int, c1:decimal(38,18), c2:decimal(38,18), c3:decimal(38,18), c4:decimal(38,18), c5:decimal(38,18), c6:decimal(38,18), c7:decimal(38,18), c8:decimal(38,18), c9:decimal(38,18), c10:decimal(38,18), c11:decimal(38,18), c12:float, c13:float, c14:float, c15:float, c16:float, c17:float, c18:float, c19:float, c20:float, c21:float, c22:float, c23:double, c24:double, c25:double, c26:double, c27:double, c28:double, c29:double, c30:double, c31:double, c32:double, c33:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY @@ -566,25 +662,73 @@ POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c8 SI POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:timestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 _c13 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - TableScan [TS_0] (rows=6 width=150) - default@part_change_various_various_timestamp,part_change_various_various_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_timestamp + Statistics: Num rows: 6 Data size: 903 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: timestamp), c2 (type: timestamp), c3 (type: timestamp), c4 (type: timestamp), c5 (type: timestamp), c6 (type: timestamp), c7 (type: timestamp), c8 (type: timestamp), c9 (type: timestamp), c10 (type: timestamp), c11 (type: timestamp), c12 (type: timestamp), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + dataColumns: insert_num:int, c1:timestamp, c2:timestamp, c3:timestamp, c4:timestamp, c5:timestamp, c6:timestamp, c7:timestamp, c8:timestamp, c9:timestamp, c10:timestamp, c11:timestamp, c12:timestamp, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY @@ -673,25 +817,73 @@ POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c3 SIMPLE POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c4 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:date1, type:date, comment:null), ] POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num date1 date1 date1 date1 _c5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_date + Statistics: Num rows: 6 Data size: 461 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: date), c2 (type: date), c3 (type: date), c4 (type: date), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 2, 3, 4, 5] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + includeColumns: [0, 1, 2, 3, 4, 5] + dataColumns: insert_num:int, c1:date, c2:date, c3:date, c4:date, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - TableScan [TS_0] (rows=6 width=76) - default@part_change_various_various_date,part_change_various_various_date,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY @@ -861,25 +1053,73 @@ POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c5 S POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c6 SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:c6, type:decimal(25,15), comment:null), ] POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).insert_num SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] same_type1_c_txt.insert_num same_type1_c_txt.c1 same_type1_c_txt.c2 same_type1_c_txt.c3 same_type1_c_txt.c4 same_type1_c_txt.c5 same_type1_c_txt.c6 same_type1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_same_type_different_params + Statistics: Num rows: 13 Data size: 1427 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(8)), c2 (type: char(32)), c3 (type: varchar(15)), c4 (type: varchar(18)), c5 (type: decimal(10,2)), c6 (type: decimal(25,15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 1, 2, 3, 4, 5, 6, 7] + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7] + dataColumns: insert_num:int, c1:char(8), c2:char(32), c3:varchar(15), c4:varchar(18), c5:decimal(10,2), c6:decimal(25,15), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=13 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - TableScan [TS_0] (rows=13 width=109) - default@part_change_same_type_different_params,part_change_same_type_different_params,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out index 06d2372..5bfe159 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out @@ -55,25 +55,72 @@ POSTHOOK: Lineage: table_add_int_permute_select.b SIMPLE [(values__tmp__table__1 POSTHOOK: Lineage: table_add_int_permute_select.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_permute_select.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=20) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=20) - default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_permute_select + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY @@ -168,25 +215,72 @@ POSTHOOK: Lineage: table_add_int_string_permute_select.c EXPRESSION [(values__tm POSTHOOK: Lineage: table_add_int_string_permute_select.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_string_permute_select.insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=21) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=21) - default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_string_permute_select + Statistics: Num rows: 6 Data size: 127 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 6 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,a,b from table_add_int_string_permute_select PREHOOK: type: QUERY @@ -343,25 +437,72 @@ POSTHOOK: Lineage: table_change_string_group_double.c2 EXPRESSION [(values__tmp_ POSTHOOK: Lineage: table_change_string_group_double.c3 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_change_string_group_double.insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=80) - Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=6 width=80) - default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_string_group_double + Statistics: Num rows: 6 Data size: 482 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 6 Data size: 482 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY @@ -587,25 +728,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_gro POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 6 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 6 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=178) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=6 width=178) - default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -742,25 +930,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 6 Data size: 1497 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6 Data size: 1497 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 1497 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=249) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=6 width=249) - default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part.q.out index 1fe9a13..d687506 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part.q.out @@ -53,25 +53,73 @@ POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).b SIMPLE [(valu POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=16) - default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_permute_select + Statistics: Num rows: 2 Data size: 33 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY @@ -158,25 +206,73 @@ POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).c EXPRES POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=19) - default@part_add_int_string_permute_select,part_add_int_string_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_string_permute_select + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY @@ -321,25 +417,73 @@ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c2 SIMPLE [ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c3 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:double1, type:double, comment:null), ] POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num double1 double1 double1 _c4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=5 width=94) - default@part_change_string_group_double,part_change_string_group_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_double + Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY @@ -421,25 +565,73 @@ POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION( POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=172) - default@part_change_date_group_string_group_date_timestamp,part_change_date_group_string_group_date_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_date_group_string_group_date_timestamp + Statistics: Num rows: 6 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: char(50)), c3 (type: char(15)), c4 (type: varchar(50)), c5 (type: varchar(15)), c6 (type: string), c7 (type: char(50)), c8 (type: char(15)), c9 (type: varchar(50)), c10 (type: varchar(15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:string, c2:char(50), c3:char(15), c4:varchar(50), c5:varchar(15), c6:string, c7:char(50), c8:char(15), c9:varchar(50), c10:varchar(15), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY @@ -590,25 +782,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_grou POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - TableScan [TS_0] (rows=6 width=182) - default@part_change_numeric_group_string_group_multi_ints_string_group,part_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 6 Data size: 1094 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -749,25 +989,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 6 Data size: 1521 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 17, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - TableScan [TS_0] (rows=6 width=253) - default@part_change_numeric_group_string_group_floating_string_group,part_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY @@ -894,25 +1182,73 @@ POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1 POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_string_group_string + Statistics: Num rows: 6 Data size: 1205 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(50)), c2 (type: char(9)), c3 (type: varchar(50)), c4 (type: char(9)), c5 (type: varchar(50)), c6 (type: varchar(9)), c7 (type: string), c8 (type: char(50)), c9 (type: char(9)), c10 (type: string), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:char(50), c2:char(9), c3:varchar(50), c4:char(9), c5:varchar(50), c6:varchar(9), c7:string, c8:char(50), c9:char(9), c10:string, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=200) - default@part_change_string_group_string_group_string,part_change_string_group_string_group_string,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY @@ -1067,25 +1403,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint P POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - TableScan [TS_0] (rows=6 width=143) - default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint,part_change_lower_to_higher_numeric_group_tinyint_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_tinyint_to_bigint + Statistics: Num rows: 6 Data size: 860 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: smallint), c2 (type: int), c3 (type: bigint), c4 (type: decimal(38,18)), c5 (type: float), c6 (type: double), c7 (type: int), c8 (type: bigint), c9 (type: decimal(38,18)), c10 (type: float), c11 (type: double), c12 (type: bigint), c13 (type: decimal(38,18)), c14 (type: float), c15 (type: double), c16 (type: decimal(38,18)), c17 (type: float), c18 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 20 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + dataColumns: insert_num:int, c1:smallint, c2:int, c3:bigint, c4:decimal(38,18), c5:float, c6:double, c7:int, c8:bigint, c9:decimal(38,18), c10:float, c11:double, c12:bigint, c13:decimal(38,18), c14:float, c15:double, c16:decimal(38,18), c17:float, c18:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY @@ -1182,25 +1566,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PA POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).c3 EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_decimal_to_float + Statistics: Num rows: 6 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: float), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:float, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=6 width=71) - default@part_change_lower_to_higher_numeric_group_decimal_to_float,part_change_lower_to_higher_numeric_group_decimal_to_float,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out index be42c05..b35bccc 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out @@ -131,25 +131,55 @@ POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).insert_num SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).s1 SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:s1, type:struct, comment:null), ] complex_struct1_c_txt.insert_num complex_struct1_c_txt.s1 complex_struct1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=6 width=155) - default@part_change_various_various_struct1,part_change_various_various_struct1,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","s1","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_struct1 + Statistics: Num rows: 6 Data size: 931 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), s1 (type: struct), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s1] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY @@ -383,25 +413,55 @@ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).b SIMPLE [ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).insert_num SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).s2 SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:s2, type:struct, comment:null), ] complex_struct2_d_txt.insert_num complex_struct2_d_txt.b complex_struct2_d_txt.s2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=8 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=8 width=117) - default@part_add_various_various_struct2,part_add_various_various_struct2,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s2"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_various_various_struct2 + Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s2] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY @@ -563,25 +623,55 @@ POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).insert_num SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).s3 SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:s3, type:struct, comment:null), ] complex_struct4_c_txt.insert_num complex_struct4_c_txt.b complex_struct4_c_txt.s3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_to_various_various_struct4 + Statistics: Num rows: 4 Data size: 353 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s3] not supported + vectorized: false -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=4 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=4 width=88) - default@part_add_to_various_various_struct4,part_add_to_various_various_struct4,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s3"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out index bb3dd31..59d6797 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out @@ -242,25 +242,73 @@ POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part= POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).c9 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=10 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"] - TableScan [TS_0] (rows=10 width=512) - default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_boolean_to_bigint + Statistics: Num rows: 10 Data size: 5126 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 (type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type: boolean), c9 (type: boolean), c10 (type: tinyint), c11 (type: tinyint), c12 (type: tinyint), c13 (type: tinyint), c14 (type: tinyint), c15 (type: tinyint), c16 (type: tinyint), c17 (type: tinyint), c18 (type: tinyint), c19 (type: tinyint), c20 (type: tinyint), c21 (type: smallint), c22 (type: smallint), c23 (type: smallint), c24 (type: smallint), c25 (type: smallint), c26 (type: smallint), c27 (type: smallint), c28 (type: smallint), c29 (type: smallint), c30 (type: smallint), c31 (type: smallint), c32 (type: int), c33 (type: int), c34 (type: int), c35 (type: int), c36 (type: int), c37 (type: int), c38 (type: int), c39 (type: int), c40 (type: int), c41 (type: int), c42 (type: int), c43 (type: bigint), c44 (type: bigint), c45 (type: bigint), c46 (type: bigint), c47 (type: bigint), c48 (type: bigint), c49 (type: bigint), c50 (type: bigint), c51 (type: bigint), c52 (type: bigint), c53 (type: bigint), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 55, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 55 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + dataColumns: insert_num:int, c1:boolean, c2:boolean, c3:boolean, c4:boolean, c5:boolean, c6:boolean, c7:boolean, c8:boolean, c9:boolean, c10:tinyint, c11:tinyint, c12:tinyint, c13:tinyint, c14:tinyint, c15:tinyint, c16:tinyint, c17:tinyint, c18:tinyint, c19:tinyint, c20:tinyint, c21:smallint, c22:smallint, c23:smallint, c24:smallint, c25:smallint, c26:smallint, c27:smallint, c28:smallint, c29:smallint, c30:smallint, c31:smallint, c32:int, c33:int, c34:int, c35:int, c36:int, c37:int, c38:int, c39:int, c40:int, c41:int, c42:int, c43:bigint, c44:bigint, c45:bigint, c46:bigint, c47:bigint, c48:bigint, c49:bigint, c50:bigint, c51:bigint, c52:bigint, c53:bigint, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY @@ -443,25 +491,73 @@ POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part= POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:decimal1, type:decimal(38,18), comment:null), ] POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 _c34 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35"] - TableScan [TS_0] (rows=6 width=455) - default@part_change_various_various_decimal_to_double,part_change_various_various_decimal_to_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_decimal_to_double + Statistics: Num rows: 6 Data size: 2735 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: decimal(38,18)), c2 (type: decimal(38,18)), c3 (type: decimal(38,18)), c4 (type: decimal(38,18)), c5 (type: decimal(38,18)), c6 (type: decimal(38,18)), c7 (type: decimal(38,18)), c8 (type: decimal(38,18)), c9 (type: decimal(38,18)), c10 (type: decimal(38,18)), c11 (type: decimal(38,18)), c12 (type: float), c13 (type: float), c14 (type: float), c15 (type: float), c16 (type: float), c17 (type: float), c18 (type: float), c19 (type: float), c20 (type: float), c21 (type: float), c22 (type: float), c23 (type: double), c24 (type: double), c25 (type: double), c26 (type: double), c27 (type: double), c28 (type: double), c29 (type: double), c30 (type: double), c31 (type: double), c32 (type: double), c33 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 35, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 35 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + dataColumns: insert_num:int, c1:decimal(38,18), c2:decimal(38,18), c3:decimal(38,18), c4:decimal(38,18), c5:decimal(38,18), c6:decimal(38,18), c7:decimal(38,18), c8:decimal(38,18), c9:decimal(38,18), c10:decimal(38,18), c11:decimal(38,18), c12:float, c13:float, c14:float, c15:float, c16:float, c17:float, c18:float, c19:float, c20:float, c21:float, c22:float, c23:double, c24:double, c25:double, c26:double, c27:double, c28:double, c29:double, c30:double, c31:double, c32:double, c33:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY @@ -566,25 +662,73 @@ POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c8 SI POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:timestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 _c13 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - TableScan [TS_0] (rows=6 width=151) - default@part_change_various_various_timestamp,part_change_various_various_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_timestamp + Statistics: Num rows: 6 Data size: 907 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: timestamp), c2 (type: timestamp), c3 (type: timestamp), c4 (type: timestamp), c5 (type: timestamp), c6 (type: timestamp), c7 (type: timestamp), c8 (type: timestamp), c9 (type: timestamp), c10 (type: timestamp), c11 (type: timestamp), c12 (type: timestamp), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + dataColumns: insert_num:int, c1:timestamp, c2:timestamp, c3:timestamp, c4:timestamp, c5:timestamp, c6:timestamp, c7:timestamp, c8:timestamp, c9:timestamp, c10:timestamp, c11:timestamp, c12:timestamp, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY @@ -673,25 +817,73 @@ POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c3 SIMPLE POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c4 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:date1, type:date, comment:null), ] POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num date1 date1 date1 date1 _c5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_date + Statistics: Num rows: 6 Data size: 461 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: date), c2 (type: date), c3 (type: date), c4 (type: date), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 2, 3, 4, 5] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + includeColumns: [0, 1, 2, 3, 4, 5] + dataColumns: insert_num:int, c1:date, c2:date, c3:date, c4:date, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - TableScan [TS_0] (rows=6 width=76) - default@part_change_various_various_date,part_change_various_various_date,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY @@ -861,25 +1053,73 @@ POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c5 S POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c6 SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:c6, type:decimal(25,15), comment:null), ] POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).insert_num SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] same_type1_c_txt.insert_num same_type1_c_txt.c1 same_type1_c_txt.c2 same_type1_c_txt.c3 same_type1_c_txt.c4 same_type1_c_txt.c5 same_type1_c_txt.c6 same_type1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_same_type_different_params + Statistics: Num rows: 13 Data size: 1427 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(8)), c2 (type: char(32)), c3 (type: varchar(15)), c4 (type: varchar(18)), c5 (type: decimal(10,2)), c6 (type: decimal(25,15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 1, 2, 3, 4, 5, 6, 7] + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7] + dataColumns: insert_num:int, c1:char(8), c2:char(32), c3:varchar(15), c4:varchar(18), c5:decimal(10,2), c6:decimal(25,15), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=13 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - TableScan [TS_0] (rows=13 width=109) - default@part_change_same_type_different_params,part_change_same_type_different_params,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out index 06d2372..2fb3fe8 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out @@ -55,25 +55,72 @@ POSTHOOK: Lineage: table_add_int_permute_select.b SIMPLE [(values__tmp__table__1 POSTHOOK: Lineage: table_add_int_permute_select.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_permute_select.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=20) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=20) - default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_permute_select + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY @@ -168,25 +215,72 @@ POSTHOOK: Lineage: table_add_int_string_permute_select.c EXPRESSION [(values__tm POSTHOOK: Lineage: table_add_int_string_permute_select.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_string_permute_select.insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=21) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=21) - default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_string_permute_select + Statistics: Num rows: 6 Data size: 127 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 6 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,a,b from table_add_int_string_permute_select PREHOOK: type: QUERY @@ -343,25 +437,72 @@ POSTHOOK: Lineage: table_change_string_group_double.c2 EXPRESSION [(values__tmp_ POSTHOOK: Lineage: table_change_string_group_double.c3 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_change_string_group_double.insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=80) - Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=6 width=80) - default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_string_group_double + Statistics: Num rows: 6 Data size: 482 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 6 Data size: 482 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY @@ -587,25 +728,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_gro POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 6 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 6 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=178) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=6 width=178) - default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -742,25 +930,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 6 Data size: 1497 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6 Data size: 1497 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 1497 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=249) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=6 width=249) - default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out index 8482ed9..e39ab5a 100644 --- ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out +++ ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out @@ -94,20 +94,24 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -138,6 +142,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFRegExp(Column[c2], Const string val) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -162,20 +172,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -206,6 +220,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> regexp_extract (Column[c2], Const string val_([0-9]+), Const int 1) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -230,20 +250,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -274,6 +298,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> regexp_replace (Column[c2], Const string val, Const string replaced) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -298,20 +328,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -342,6 +376,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFRegExp(Column[c2], Const string val) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones + vectorized: false Stage: Stage-0 Fetch Operator @@ -366,20 +406,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -394,15 +438,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 4:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 5:string, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 6:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 7:string) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -410,6 +468,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -434,20 +500,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -462,15 +532,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 4:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 5:string, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 6:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 7:string) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -478,6 +562,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -502,10 +594,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -533,6 +629,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFPower(Column[key], Const int 2) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -586,20 +688,24 @@ POSTHOOK: Input: default@decimal_udf 9.8596 9.8596 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -630,6 +736,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> log (Column[value], Const decimal(20,10) 10) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -654,10 +766,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 -PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -685,6 +801,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFPower(Column[key], Const int 2) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones + vectorized: false Stage: Stage-0 Fetch Operator @@ -738,20 +860,24 @@ POSTHOOK: Input: default@decimal_udf 9.8596 9.8596 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -782,6 +908,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> log (Column[value], Const decimal(20,10) 10) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones + vectorized: false Stage: Stage-0 Fetch Operator @@ -806,12 +938,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -847,17 +983,40 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFWhen(Column[bool], Const int 1, GenericUDFOPNot(Column[bool]), Const int 0, Const void null) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -883,12 +1042,16 @@ key2 1 key3 0 key4 1 key5 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -906,12 +1069,27 @@ STAGE PLANS: TableScan alias: count_case_groupby Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] + selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:int Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -920,21 +1098,50 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index b093ded..8a23d6a 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,12 +128,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: dc (type: decimal(38,18)) outputColumnNames: dc + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(dc), max(dc), sum(dc), avg(dc) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 6) -> decimal(38,18), VectorUDAFMaxDecimal(col 6) -> decimal(38,18), VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimal(col 6) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 6) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE @@ -139,8 +157,21 @@ STAGE PLANS: value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index fdd3d1b..0cf62d3 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -31,9 +31,9 @@ POSTHOOK: Output: default@testvec POSTHOOK: Lineage: testvec.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: testvec.greg_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: testvec.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 PREHOOK: type: QUERY -POSTHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index bc37741..b51bed6 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -38,47 +38,113 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@tbl2 POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -95,7 +161,7 @@ POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 22 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from ( select key, count(*) from @@ -105,7 +171,7 @@ select count(*) from group by key ) subq2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from ( select key, count(*) from @@ -115,46 +181,153 @@ select count(*) from group by key ) subq2 POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 vectorized, llap - File Output Operator [FS_31] - Group By Operator [GBY_30] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_29] - Group By Operator [GBY_28] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_27] (rows=5 width=93) - Group By Operator [GBY_26] (rows=5 width=93) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=11 width=93) - Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_24] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_23] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_22] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( @@ -181,7 +354,7 @@ POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 6 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -198,7 +371,7 @@ join ) src2 on src1.key = src2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -215,71 +388,214 @@ join ) src2 on src1.key = src2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 llap - File Output Operator [FS_32] - Select Operator [SEL_31] (rows=5 width=102) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_49] (rows=5 width=102) - Conds:RS_51._col0=RS_53._col0(Inner),Output:["_col0","_col1","_col3"] - <-Reducer 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_51] - PartitionCols:_col0 - Group By Operator [GBY_50] (rows=5 width=93) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=11 width=93) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_42] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_41] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Reducer 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_53] - PartitionCols:_col0 - Group By Operator [GBY_52] (rows=5 width=93) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] - PartitionCols:_col0 - Group By Operator [GBY_24] (rows=11 width=93) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_47] (rows=11 width=93) - Conds:SEL_16._col0=SEL_19._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_19] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_44] (rows=10 width=93) - predicate:key is not null - TableScan [TS_17] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_16] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_43] (rows=10 width=93) - predicate:key is not null - TableScan [TS_14] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from ( @@ -325,51 +641,117 @@ POSTHOOK: Input: default@tbl2 5 9 9 8 1 1 9 1 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=3 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 @@ -390,7 +772,7 @@ POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from ( select * from @@ -402,7 +784,7 @@ select count(*) from join tbl2 b on subq2.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from ( select * from @@ -414,37 +796,103 @@ select count(*) from join tbl2 b on subq2.key = b.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( @@ -475,7 +923,7 @@ POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from ( select * from @@ -494,7 +942,7 @@ select count(*) from ) subq4 on subq2.key = subq4.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from ( select * from @@ -513,37 +961,103 @@ select count(*) from ) subq4 on subq2.key = subq4.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_3] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( @@ -586,51 +1100,117 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=3 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=3 width=93) - predicate:(key < 8) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=3 width=93) - predicate:(key < 8) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 @@ -651,58 +1231,183 @@ POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongColAddLongScalar(col 0, val 1) -> 2:long) -> boolean + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongColAddLongScalar(col 0, val 1) -> 2:long) -> boolean + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 vectorized, llap - File Output Operator [FS_29] - Group By Operator [GBY_28] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_21] (rows=11 width=93) - Conds:RS_24._col0=RS_27._col0(Inner) - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_24] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_22] (rows=10 width=93) - predicate:(key + 1) is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_26] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_25] (rows=10 width=93) - predicate:(key + 1) is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 @@ -723,47 +1428,113 @@ POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 22 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=3 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 @@ -780,7 +1551,7 @@ POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -790,7 +1561,7 @@ select count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 on (subq1.key = subq3.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -800,43 +1571,116 @@ select count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 on (subq1.key = subq3.key) POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Statistics: Num rows: 6 Data size: 613 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_32] - Group By Operator [GBY_31] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_15] - Group By Operator [GBY_14] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_28] (rows=6 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner),SEL_2._col0=SEL_8._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_26] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_8] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_27] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_6] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_25] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 @@ -863,7 +1707,7 @@ POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 56 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -876,7 +1720,7 @@ select count(*) from ( join tbl2 b on subq2.key = b.key) a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -889,37 +1733,103 @@ select count(*) from ( join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from @@ -968,60 +1878,122 @@ POSTHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, val1, val2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, val1, val2 POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Execution mode: llap -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Map 1 llap - File Output Operator [FS_11] - table:{"name:":"default.dest1"} - Select Operator [SEL_10] (rows=11 width=93) - Output:["_col0","_col1"] - Select Operator [SEL_9] (rows=11 width=93) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_18] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1","_col3"] - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - File Output Operator [FS_13] - table:{"name:":"default.dest2"} - Please refer to the previous Select Operator [SEL_9] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -1124,70 +2096,171 @@ POSTHOOK: query: CREATE TABLE dest2(key int, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, count(*) group by key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, count(*) group by key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 2 vectorized, llap - File Output Operator [FS_27] - table:{"name:":"default.dest2"} - Select Operator [SEL_26] (rows=5 width=93) - Output:["_col0","_col1"] - Group By Operator [GBY_25] (rows=5 width=93) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - File Output Operator [FS_11] - table:{"name:":"default.dest1"} - Merge Join Operator [MERGEJOIN_23] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_22] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0","_col1"] - Filter Operator [FIL_21] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - SHUFFLE [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_13] (rows=11 width=93) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_12] (rows=11 width=93) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_23] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Stage: Stage-5 + Stats-Aggr Operator PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key diff --git ql/src/test/results/clientpositive/llap/vector_between_columns.q.out ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index 1ed359f..83891ad 100644 --- ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_columns.q.out @@ -61,13 +61,17 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -85,9 +89,16 @@ STAGE PLANS: TableScan alias: tint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -95,6 +106,10 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -102,9 +117,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3, 5] + selectExpressions: VectorUDFAdaptor(CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> 5:string Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -112,21 +135,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 2 Map Operator Tree: TableScan alias: tsint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), csint (type: smallint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: smallint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -172,13 +223,17 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 4 3 10 1 NoOk 4 4 10 10 Ok Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -196,9 +251,16 @@ STAGE PLANS: TableScan alias: tint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -206,19 +268,34 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> boolean predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3] Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -226,21 +303,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 2 Map Operator Tree: TableScan alias: tsint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), csint (type: smallint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: smallint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 74384a4..8a4383e 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,28 +37,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnInList(col 3, values [-67, -171]) -> boolean predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -67,10 +109,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -88,32 +134,78 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 4:boolean) -> boolean predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -126,10 +218,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -147,28 +243,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> boolean predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -181,10 +315,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -202,32 +340,78 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean) -> boolean predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -240,10 +424,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -261,28 +449,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 3, left -2, right 1) -> boolean predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -295,10 +521,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -316,28 +546,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnNotBetween(col 3, left -610, right 608) -> boolean predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -350,10 +618,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -371,28 +643,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnBetween(col 1, left -20, right 45.9918918919) -> boolean predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -405,10 +715,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -426,32 +740,78 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnNotBetween(col 1, left -2000, right 4390.1351351351) -> boolean predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -709,10 +1069,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### 6172 -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -731,12 +1095,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -745,15 +1124,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -761,17 +1166,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -784,10 +1208,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -806,12 +1234,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -820,15 +1263,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -836,17 +1305,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -859,10 +1347,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -881,12 +1373,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -895,15 +1402,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -911,17 +1444,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -934,10 +1486,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -956,12 +1512,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -970,15 +1541,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -986,17 +1583,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 7de04a7..615c31a 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -95,14 +95,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type: POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -121,12 +125,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 10) -> boolean predicate: bin is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -134,6 +149,10 @@ STAGE PLANS: keys: 0 _col10 (type: binary) 1 _col10 (type: binary) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 3 @@ -141,48 +160,111 @@ STAGE PLANS: Select Operator expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [21] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 21:int Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 21) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 3 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 10) -> boolean predicate: bin is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col10 (type: binary) sort order: + Map-reduce partition columns: _col10 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -206,16 +288,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### -27832781952 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -233,12 +319,26 @@ STAGE PLANS: TableScan alias: hundredorc Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: bin (type: binary) outputColumnNames: bin + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 10 + native: false + projectedOutputColumns: [0] keys: bin (type: binary) mode: hash outputColumnNames: _col0, _col1 @@ -247,15 +347,41 @@ STAGE PLANS: key expressions: _col0 (type: binary) sort order: + Map-reduce partition columns: _col0 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: binary) mode: mergepartial outputColumnNames: _col0, _col1 @@ -263,9 +389,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: bigint), _col0 (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -316,14 +449,18 @@ POSTHOOK: Input: default@hundredorc 3 xylophone band 2 yard duty 3 zync studies -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -341,12 +478,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -354,6 +502,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 @@ -361,9 +513,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10, 11] Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -371,26 +530,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_bround.q.out ql/src/test/results/clientpositive/llap/vector_bround.q.out index 431b91a..d463f1a 100644 --- ql/src/test/results/clientpositive/llap/vector_bround.q.out +++ ql/src/test/results/clientpositive/llap/vector_bround.q.out @@ -32,19 +32,22 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test_vector_bround POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +PREHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY -POSTHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +POSTHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround POSTHOOK: type: QUERY Plan optimized by CBO. Stage-0 Fetch Operator limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1"] - TableScan [TS_0] - Output:["v0","v1"] + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=8 width=16) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=8 width=16) + default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:NONE,Output:["v0","v1"] PREHOOK: query: select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_bucket.q.out ql/src/test/results/clientpositive/llap/vector_bucket.q.out index 0573abf..7917ffd 100644 --- ql/src/test/results/clientpositive/llap/vector_bucket.q.out +++ ql/src/test/results/clientpositive/llap/vector_bucket.q.out @@ -6,12 +6,16 @@ POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INT POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@non_orc_table -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -42,15 +46,34 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1] + selectExpressions: VectorUDFAdaptor(UDFToInteger(VALUE._col0)) -> 2:int Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 9a2456f..31452c2 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -95,20 +95,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,12 +131,27 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: i (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(50), avg(50.0), avg(50) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 2 + native: false + projectedOutputColumns: [0, 1, 2] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -146,8 +165,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) @@ -163,16 +195,33 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index bdcf7d8..3da086a 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -47,18 +47,22 @@ val_10 10 1 val_100 200 2 val_103 206 2 val_104 208 2 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -77,12 +81,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:int Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -91,16 +110,43 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -108,21 +154,43 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -177,18 +245,22 @@ val_97 194 2 val_96 96 1 val_95 190 2 val_92 92 1 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -207,12 +279,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:int Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -221,16 +308,43 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: - Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -238,21 +352,43 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_char_4.q.out ql/src/test/results/clientpositive/llap/vector_char_4.q.out index 6d55ab0..d164ebe 100644 --- ql/src/test/results/clientpositive/llap/vector_char_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -143,12 +147,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: name: default.char_lazy_binary_columnar Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out index f2a386e..67b575c 100644 --- ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out @@ -124,10 +124,14 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@char_join1_str_orc POSTHOOK: Lineage: char_join1_str_orc.c1 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: char_join1_str_orc.c2 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c2, type:string, comment:null), ] -PREHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -146,12 +150,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -159,6 +174,10 @@ STAGE PLANS: keys: 0 _col1 (type: char(10)) 1 _col1 (type: char(10)) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 @@ -166,39 +185,89 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: char(10)) sort order: + Map-reduce partition columns: _col1 (type: char(10)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(10)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -224,10 +293,14 @@ POSTHOOK: Input: default@char_join1_vc1_orc 2 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -246,32 +319,66 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: char(20)) sort order: + Map-reduce partition columns: _col1 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(20)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -279,6 +386,10 @@ STAGE PLANS: keys: 0 _col1 (type: char(20)) 1 _col1 (type: char(20)) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -286,19 +397,46 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(20)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -326,10 +464,14 @@ POSTHOOK: Input: default@char_join1_vc2_orc 2 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -348,12 +490,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -361,6 +514,11 @@ STAGE PLANS: keys: 0 UDFToString(_col1) (type: string) 1 _col1 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: CastStringGroupToString(col 1) -> 2:String + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 @@ -368,39 +526,89 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_char_simple.q.out ql/src/test/results/clientpositive/llap/vector_char_simple.q.out index b17bf57..1443bef 100644 --- ql/src/test/results/clientpositive/llap/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,51 +66,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: + - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: select key, value from char_2 @@ -146,16 +131,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -163,51 +152,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: - - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: select key, value from char_2 @@ -248,12 +218,16 @@ POSTHOOK: query: create table char_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -263,68 +237,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int) + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: CAST( _col0 AS CHAR(12) (type: char(12)) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: CastLongToChar(col 0, maxLength 12) -> 1:Char + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-2 - Dependency Collection Stage: Stage-0 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 Stage: Stage-3 - Stats-Aggr Operator PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_coalesce.q.out ql/src/test/results/clientpositive/llap/vector_coalesce.q.out index f4634d3..944d38d 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce.q.out @@ -1,15 +1,19 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -17,53 +21,62 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 1045942 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cdouble is null (type: boolean) - Statistics: Num rows: 3114 Data size: 265164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string), cint (type: int), cfloat (type: float), csmallint (type: smallint), COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) - sort order: +++++ - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 2, 4, 1, 16] + selectExpressions: VectorCoalesce(columns [12, 6, 13, 14, 15])(children: ConstantVectorExpression(val null) -> 12:string, col 6, CastLongToString(col 2) -> 13:String, VectorUDFAdaptor(null(cfloat)) -> 14:string, CastLongToString(col 1) -> 15:String) -> 16:string + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: double), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: float), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 246572 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 0, 1, 2, 3, 4] + selectExpressions: ConstantVectorExpression(val null) -> 5:double + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc @@ -91,18 +104,22 @@ NULL NULL -738306196 -51.0 NULL -738306196 NULL NULL -819152895 8.0 NULL -819152895 NULL NULL -827212561 8.0 NULL -827212561 NULL NULL -949587513 11.0 NULL -949587513 -PREHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -110,53 +127,62 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ctinyint is null (type: boolean) - Statistics: Num rows: 3115 Data size: 37224 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double), cint (type: int), COALESCE(null,(cdouble + log2(cint)),0) (type: double) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: double), _col2 (type: int), _col3 (type: double) - sort order: +++ - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 15] + selectExpressions: VectorCoalesce(columns [12, 14, 13])(children: ConstantVectorExpression(val null) -> 12:double, DoubleColAddDoubleColumn(col 5, col 13)(children: FuncLog2LongToDouble(col 2) -> 13:double) -> 14:double, ConstantVectorExpression(val 0.0) -> 13:double) -> 15:double + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 27928 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 3:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc @@ -184,18 +210,22 @@ NULL NULL -850295959 0.0 NULL NULL -886426182 0.0 NULL NULL -899422227 0.0 NULL NULL -971543377 0.0 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -203,50 +233,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), 0.0 (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 0:float, ConstantVectorExpression(val null) -> 1:bigint, ConstantVectorExpression(val 0.0) -> 2:double + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc @@ -274,18 +315,22 @@ NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 -PREHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -293,53 +338,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ctimestamp1 is not null or ctimestamp2 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), COALESCE(ctimestamp1,ctimestamp2) (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: timestamp) - sort order: +++ - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 9) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8, col 9) -> 12:timestamp + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: timestamp), KEY.reducesinkkey2 (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc @@ -367,18 +420,22 @@ NULL 1969-12-31 15:59:43.684 1969-12-31 15:59:43.684 NULL 1969-12-31 15:59:43.703 1969-12-31 15:59:43.703 NULL 1969-12-31 15:59:43.704 1969-12-31 15:59:43.704 NULL 1969-12-31 15:59:43.709 1969-12-31 15:59:43.709 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -386,50 +443,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), null (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 0:float, ConstantVectorExpression(val null) -> 1:bigint, ConstantVectorExpression(val null) -> 2:float + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc @@ -457,34 +525,61 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 3) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 0, 14] + selectExpressions: ConstantVectorExpression(val null) -> 12:bigint, VectorCoalesce(columns [13, 0])(children: ConstantVectorExpression(val null) -> 13:tinyint, col 0) -> 14:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: cbigint is null (type: boolean) - Select Operator - expressions: null (type: bigint), ctinyint (type: tinyint), COALESCE(null,ctinyint) (type: tinyint) - outputColumnNames: _col0, _col1, _col2 - Limit - Number of rows: 10 - ListSink PREHOOK: query: SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index 18f45ff..6abf92d 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -14,18 +14,22 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@str_str_orc POSTHOOK: Lineage: str_str_orc.str1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: str_str_orc.str2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,6 +53,10 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -66,6 +74,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -104,14 +116,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -160,18 +176,22 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -189,12 +209,27 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4] + selectExpressions: VectorUDFAdaptor(UDFToInteger(COALESCE(str1,0)))(children: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string) -> 4:int Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -203,15 +238,41 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -219,9 +280,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2)(children: DoubleColDivideDoubleScalar(col 2, val 60.0)(children: CastLongToDouble(col 1) -> 2:double) -> 3:double) -> 2:double Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -250,14 +319,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -272,12 +345,23 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -285,6 +369,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 89507ee..5ea4b0f 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -17,13 +17,17 @@ POSTHOOK: Output: default@test POSTHOOK: Lineage: test.a SIMPLE [] POSTHOOK: Lineage: test.b EXPRESSION [] _c0 _c1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,6 +71,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type map of Column[_col1] not supported + vectorized: false Map 2 Map Operator Tree: TableScan @@ -87,6 +97,12 @@ STAGE PLANS: value expressions: _col1 (type: map) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of Column[b] not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -140,13 +156,17 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test2b POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -190,6 +210,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type array of Column[a] not supported + vectorized: false Map 2 Map Operator Tree: TableScan @@ -206,6 +232,12 @@ STAGE PLANS: value expressions: a (type: array) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Data type array of Column[a] not supported + vectorized: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_const.q.out ql/src/test/results/clientpositive/llap/vector_const.q.out new file mode 100644 index 0000000..f7b958e --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_const.q.out @@ -0,0 +1,64 @@ +PREHOOK: query: CREATE TEMPORARY TABLE varchar_const_1 (c1 int) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_const_1 +POSTHOOK: query: CREATE TEMPORARY TABLE varchar_const_1 (c1 int) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_const_1 +PREHOOK: query: INSERT INTO varchar_const_1 values(42) +PREHOOK: type: QUERY +PREHOOK: Output: default@varchar_const_1 +POSTHOOK: query: INSERT INTO varchar_const_1 values(42) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@varchar_const_1 +POSTHOOK: Lineage: varchar_const_1.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: varchar_const_1 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'FF' (type: varchar(4)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_const_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_const_1 +#### A masked pattern was here #### +FF diff --git ql/src/test/results/clientpositive/llap/vector_count.q.out ql/src/test/results/clientpositive/llap/vector_count.q.out index 9ef5c2b..ff742b9 100644 --- ql/src/test/results/clientpositive/llap/vector_count.q.out +++ ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -43,10 +43,14 @@ POSTHOOK: Input: default@abcd 12 100 75 7 12 NULL 80 2 NULL 35 23 6 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -64,12 +68,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + projectedOutputColumns: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -78,12 +96,30 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) @@ -117,10 +153,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -138,12 +178,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:long) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -151,12 +205,30 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) @@ -186,10 +258,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -207,20 +283,45 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: a (type: int), b (type: int), c (type: int) sort order: +++ Map-reduce partition columns: a (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) @@ -254,10 +355,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -275,18 +380,43 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index 3456d45..d3eb27f 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1225,12 +1225,16 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_sold_time_sk SIMPLE POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_warehouse_sk SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_warehouse_sk, type:int, comment:null), ] POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_web_page_sk, type:int, comment:null), ] POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(distinct ws_order_number) from web_sales PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(distinct ws_order_number) from web_sales POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1249,11 +1253,24 @@ STAGE PLANS: TableScan alias: web_sales Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] Select Operator expressions: ws_order_number (type: int) outputColumnNames: ws_order_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [16] Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 16 + native: false + projectedOutputColumns: [] keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 @@ -1262,36 +1279,88 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 1760000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index 045f536..ec6c4c8 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -95,10 +95,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -191,10 +195,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -17045922556 -PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -212,30 +220,67 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index a8d1e05..2f44d56 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -20,20 +20,24 @@ POSTHOOK: Lineage: decimal_vgby.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_vgby.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -51,12 +55,26 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -65,28 +83,65 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(20,10), VectorUDAFMinDecimal(col 3) -> decimal(20,10), VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFCountMerge(col 5) -> bigint, VectorUDAFMaxDecimal(col 6) -> decimal(23,14), VectorUDAFMinDecimal(col 7) -> decimal(23,14), VectorUDAFSumDecimal(col 8) -> decimal(38,18), VectorUDAFCountMerge(col 9) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 9, val 1) -> boolean predicate: (_col9 > 1) (type: boolean) Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -125,20 +180,24 @@ POSTHOOK: Input: default@decimal_vgby 6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -156,12 +215,27 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -174,8 +248,21 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col4] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out index 16d9929..c45210e 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out @@ -1,25 +1,76 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 638316 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 5) -> boolean, SelectColumnIsNotNull(col 2) -> boolean, SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 8) -> boolean) -> boolean + predicate: (cdouble is not null and cint is not null and cboolean1 is not null and ctimestamp1 is not null) (type: boolean) + Statistics: Num rows: 5112 Data size: 265564 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 10, 8, 12, 13, 14, 15] + selectExpressions: CastDoubleToDecimal(col 5) -> 12:decimal(20,10), CastLongToDecimal(col 2) -> 13:decimal(23,14), CastLongToDecimal(col 10) -> 14:decimal(5,2), CastTimestampToDecimal(col 8) -> 15:decimal(15,0) + Statistics: Num rows: 5112 Data size: 2410700 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (cdouble is not null and cint is not null and cboolean1 is not null and ctimestamp1 is not null) (type: boolean) - Select Operator - expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out index 67b58c7..dcc2d13 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out @@ -11,14 +11,18 @@ POSTHOOK: Output: default@decimal_test POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -36,32 +40,74 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 12288 Data size: 2128368 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 1, val 0) -> boolean, FilterDecimalColLessDecimalScalar(col 1, val 12345.5678) -> boolean, FilterDecimalColNotEqualDecimalScalar(col 2, val 0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 2, val 1000) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((cdecimal1 > 0) and (cdecimal1 < 12345.5678) and (cdecimal2 <> 0) and (cdecimal2 > 1000) and cdouble is not null) (type: boolean) Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdecimal1 + cdecimal2) (type: decimal(25,14)), (cdecimal1 - (2 * cdecimal2)) (type: decimal(26,14)), ((cdecimal1 + 2.34) / cdecimal2) (type: decimal(38,13)), (cdecimal1 * (cdecimal2 / 3.4)) (type: decimal(38,17)), (cdecimal1 % 10) (type: decimal(12,10)), UDFToInteger(cdecimal1) (type: int), UDFToShort(cdecimal2) (type: smallint), UDFToByte(cdecimal2) (type: tinyint), UDFToLong(cdecimal1) (type: bigint), UDFToBoolean(cdecimal1) (type: boolean), UDFToDouble(cdecimal2) (type: double), UDFToFloat(cdecimal1) (type: float), UDFToString(cdecimal2) (type: string), CAST( cdecimal1 AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + selectExpressions: DecimalColAddDecimalColumn(col 1, col 2) -> 3:decimal(25,14), DecimalColSubtractDecimalColumn(col 1, col 4)(children: DecimalScalarMultiplyDecimalColumn(val 2, col 2) -> 4:decimal(25,14)) -> 5:decimal(26,14), DecimalColDivideDecimalColumn(col 6, col 2)(children: DecimalColAddDecimalScalar(col 1, val 2.34) -> 6:decimal(21,10)) -> 7:decimal(38,13), DecimalColMultiplyDecimalColumn(col 1, col 8)(children: DecimalColDivideDecimalScalar(col 2, val 3.4) -> 8:decimal(27,17)) -> 9:decimal(38,17), DecimalColModuloDecimalScalar(col 1, val 10) -> 10:decimal(12,10), CastDecimalToLong(col 1) -> 11:int, CastDecimalToLong(col 2) -> 12:smallint, CastDecimalToLong(col 2) -> 13:tinyint, CastDecimalToLong(col 1) -> 14:bigint, CastDecimalToBoolean(col 1) -> 15:Boolean, CastDecimalToDouble(col 2) -> 16:double, CastDecimalToDouble(col 1) -> 17:double, CastDecimalToString(col 2) -> 18:String, CastDecimalToTimestamp(col 1) -> 19:timestamp Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(25,14)), _col1 (type: decimal(26,14)), _col2 (type: decimal(38,13)), _col3 (type: decimal(38,17)), _col4 (type: decimal(12,10)), _col5 (type: int), _col6 (type: smallint), _col7 (type: tinyint), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: double), _col11 (type: float), _col12 (type: string), _col13 (type: timestamp) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(25,14)), KEY.reducesinkkey1 (type: decimal(26,14)), KEY.reducesinkkey2 (type: decimal(38,13)), KEY.reducesinkkey3 (type: decimal(38,17)), KEY.reducesinkkey4 (type: decimal(12,10)), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: smallint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: string), KEY.reducesinkkey13 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index eddb4dc..29e779d 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -72,12 +72,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -95,12 +99,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -108,12 +123,21 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -121,25 +145,56 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(6,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(6,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out index ec9caf9..e0c680e 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out @@ -12,7 +12,7 @@ POSTHOOK: Lineage: decimal_test.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSc POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) @@ -49,7 +49,7 @@ where cbigint % 500 = 0 and sin(cdecimal1) >= -1.0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) @@ -86,22 +86,69 @@ where cbigint % 500 = 0 and sin(cdecimal1) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_test + Statistics: Num rows: 12288 Data size: 2201752 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 4, val 0)(children: LongColModuloLongScalar(col 0, val 500) -> 4:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 6, val -1.0)(children: FuncSinDoubleToDouble(col 5)(children: CastDecimalToDouble(col 2) -> 5:double) -> 6:double) -> boolean) -> boolean + predicate: (((cbigint % 500) = 0) and (sin(cdecimal1) >= -1.0)) (type: boolean) + Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdecimal1 (type: decimal(20,10)), round(cdecimal1, 2) (type: decimal(13,2)), round(cdecimal1) (type: decimal(11,0)), floor(cdecimal1) (type: decimal(11,0)), ceil(cdecimal1) (type: decimal(11,0)), round(exp(cdecimal1), 58) (type: double), ln(cdecimal1) (type: double), log10(cdecimal1) (type: double), log2(cdecimal1) (type: double), log2((cdecimal1 - 15601)) (type: double), log(2, cdecimal1) (type: double), power(log2(cdecimal1), 2) (type: double), power(log2(cdecimal1), 2) (type: double), sqrt(cdecimal1) (type: double), abs(cdecimal1) (type: decimal(20,10)), sin(cdecimal1) (type: double), asin(cdecimal1) (type: double), cos(cdecimal1) (type: double), acos(cdecimal1) (type: double), atan(cdecimal1) (type: double), degrees(cdecimal1) (type: double), radians(cdecimal1) (type: double), cdecimal1 (type: decimal(20,10)), (- cdecimal1) (type: decimal(20,10)), sign(cdecimal1) (type: int), cos(((- sin(log(cdecimal1))) + 3.14159)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 7, 8, 9, 10, 5, 11, 12, 13, 15, 6, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 2, 28, 4, 29] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 2, decimalPlaces 2) -> 7:decimal(13,2), FuncRoundDecimalToDecimal(col 2) -> 8:decimal(11,0), FuncFloorDecimalToDecimal(col 2) -> 9:decimal(11,0), FuncCeilDecimalToDecimal(col 2) -> 10:decimal(11,0), RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 58)(children: FuncExpDoubleToDouble(col 5)(children: CastDecimalToDouble(col 2) -> 5:double) -> 6:double) -> 5:double, FuncLnDoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 11:double, FuncLog10DoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 12:double, FuncLog2DoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 13:double, FuncLog2DoubleToDouble(col 6)(children: CastDecimalToDouble(col 14)(children: DecimalColSubtractDecimalScalar(col 2, val 15601) -> 14:decimal(21,10)) -> 6:double) -> 15:double, VectorUDFAdaptor(log(2, cdecimal1)) -> 6:double, VectorUDFAdaptor(power(log2(cdecimal1), 2))(children: FuncLog2DoubleToDouble(col 16)(children: CastDecimalToDouble(col 2) -> 16:double) -> 17:double) -> 16:double, VectorUDFAdaptor(power(log2(cdecimal1), 2))(children: FuncLog2DoubleToDouble(col 17)(children: CastDecimalToDouble(col 2) -> 17:double) -> 18:double) -> 17:double, FuncSqrtDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 19:double, FuncAbsDecimalToDecimal(col 2) -> 20:decimal(20,10), FuncSinDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 21:double, FuncASinDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 22:double, FuncCosDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 23:double, FuncACosDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 24:double, FuncATanDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 25:double, FuncDegreesDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 26:double, FuncRadiansDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 27:double, FuncNegateDecimalToDecimal(col 2) -> 28:decimal(20,10), FuncSignDecimalToLong(col 2) -> 4:int, FuncCosDoubleToDouble(col 18)(children: DoubleColAddDoubleScalar(col 29, val 3.14159)(children: DoubleColUnaryMinus(col 18)(children: FuncSinDoubleToDouble(col 29)(children: FuncLnDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 29:double) -> 18:double) -> 29:double) -> 18:double) -> 29:double + Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: decimal_test - Filter Operator - predicate: (((cbigint % 500) = 0) and (sin(cdecimal1) >= -1.0)) (type: boolean) - Select Operator - expressions: cdecimal1 (type: decimal(20,10)), round(cdecimal1, 2) (type: decimal(13,2)), round(cdecimal1) (type: decimal(11,0)), floor(cdecimal1) (type: decimal(11,0)), ceil(cdecimal1) (type: decimal(11,0)), round(exp(cdecimal1), 58) (type: double), ln(cdecimal1) (type: double), log10(cdecimal1) (type: double), log2(cdecimal1) (type: double), log2((cdecimal1 - 15601)) (type: double), log(2, cdecimal1) (type: double), power(log2(cdecimal1), 2) (type: double), power(log2(cdecimal1), 2) (type: double), sqrt(cdecimal1) (type: double), abs(cdecimal1) (type: decimal(20,10)), sin(cdecimal1) (type: double), asin(cdecimal1) (type: double), cos(cdecimal1) (type: double), acos(cdecimal1) (type: double), atan(cdecimal1) (type: double), degrees(cdecimal1) (type: double), radians(cdecimal1) (type: double), cdecimal1 (type: decimal(20,10)), (- cdecimal1) (type: decimal(20,10)), sign(cdecimal1) (type: int), cos(((- sin(log(cdecimal1))) + 3.14159)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 - ListSink + ListSink PREHOOK: query: select cdecimal1 diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index c16f605..47dc887 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -545,10 +545,14 @@ NULL NULL 123456789.0123456789 15241578753238836.75019051998750191 1234567890.1234560000 1524157875323881726.87092138393600000 1234567890.1234567890 1524157875323883675.01905199875019052 -PREHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -566,12 +570,26 @@ STAGE PLANS: TableScan alias: decimal_precision Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(20,10)) outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(dec), sum(dec) + Group By Vectorization: + aggregators: VectorUDAFAvgDecimal(col 0) -> struct, VectorUDAFSumDecimal(col 0) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE @@ -581,8 +599,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index 8e7cd63..134b008 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -28,12 +28,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,15 +66,33 @@ STAGE PLANS: value expressions: _col1 (type: decimal(11,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -92,12 +114,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -126,15 +152,33 @@ STAGE PLANS: value expressions: _col0 (type: decimal(10,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -182,12 +226,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -216,15 +264,33 @@ STAGE PLANS: value expressions: _col1 (type: decimal(11,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -246,12 +312,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -280,15 +350,33 @@ STAGE PLANS: value expressions: _col0 (type: decimal(10,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -336,12 +424,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -359,26 +451,61 @@ STAGE PLANS: TableScan alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -400,12 +527,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -423,26 +554,60 @@ STAGE PLANS: TableScan alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index 29ec85b..8eb892b 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -24,20 +24,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_1_orc #### A masked pattern was here #### 55555.000000000000000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8) FROM decimal_tbl_1_orc ORDER BY d PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8) FROM decimal_tbl_1_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,26 +59,61 @@ STAGE PLANS: TableScan alias: decimal_tbl_1_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: round(dec) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 5:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 13:decimal(21,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -131,7 +170,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### 125.315000000000000000 -125.315000000000000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -141,7 +180,7 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -151,6 +190,10 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -168,26 +211,61 @@ STAGE PLANS: TableScan alias: decimal_tbl_2_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(pos) (type: decimal(21,0)), round(pos, 0) (type: decimal(21,0)), round(pos, 1) (type: decimal(22,1)), round(pos, 2) (type: decimal(23,2)), round(pos, 3) (type: decimal(24,3)), round(pos, 4) (type: decimal(25,4)), round(pos, -1) (type: decimal(21,0)), round(pos, -2) (type: decimal(21,0)), round(pos, -3) (type: decimal(21,0)), round(pos, -4) (type: decimal(21,0)), round(neg) (type: decimal(21,0)), round(neg, 0) (type: decimal(21,0)), round(neg, 1) (type: decimal(22,1)), round(neg, 2) (type: decimal(23,2)), round(neg, 3) (type: decimal(24,3)), round(neg, 4) (type: decimal(25,4)), round(neg, -1) (type: decimal(21,0)), round(neg, -2) (type: decimal(21,0)), round(neg, -3) (type: decimal(21,0)), round(neg, -4) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 4:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 5:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 6:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 7:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 11:decimal(21,0), FuncRoundDecimalToDecimal(col 1) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 0) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 1) -> 14:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 2) -> 15:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 3) -> 16:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 4) -> 17:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -1) -> 18:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -2) -> 19:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -3) -> 20:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -4) -> 21:decimal(21,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(25,4)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(22,1)), VALUE._col12 (type: decimal(23,2)), VALUE._col13 (type: decimal(24,3)), VALUE._col14 (type: decimal(25,4)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(21,0)), VALUE._col17 (type: decimal(21,0)), VALUE._col18 (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -249,7 +327,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### 3.141592653589793000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -270,7 +348,7 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -291,6 +369,10 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -308,26 +390,61 @@ STAGE PLANS: TableScan alias: decimal_tbl_3_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: round(dec, -15) (type: decimal(21,0)), round(dec, -16) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -13) (type: decimal(21,0)), round(dec, 4) (type: decimal(25,4)), round(dec, 5) (type: decimal(26,5)), round(dec, 6) (type: decimal(27,6)), round(dec, 7) (type: decimal(28,7)), round(dec, 8) (type: decimal(29,8)), round(dec, 9) (type: decimal(30,9)), round(dec, 10) (type: decimal(31,10)), round(dec, 11) (type: decimal(32,11)), round(dec, 12) (type: decimal(33,12)), round(dec, 13) (type: decimal(34,13)), round(dec, -14) (type: decimal(21,0)), round(dec, 14) (type: decimal(35,14)), round(dec, 15) (type: decimal(36,15)), round(dec, 16) (type: decimal(37,16)), round(dec, -11) (type: decimal(21,0)), round(dec, -12) (type: decimal(21,0)), round(dec, -9) (type: decimal(21,0)), round(dec, -10) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col3, _col31, _col32, _col33, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -15) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -16) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 5:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 10:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 11:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 12:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -13) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 14:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 5) -> 15:decimal(26,5), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 6) -> 16:decimal(27,6), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 7) -> 17:decimal(28,7), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 8) -> 18:decimal(29,8), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 19:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 10) -> 20:decimal(31,10), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 11) -> 21:decimal(32,11), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 12) -> 22:decimal(33,12), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 13) -> 23:decimal(34,13), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -14) -> 24:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 14) -> 25:decimal(35,14), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 15) -> 26:decimal(36,15), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 16) -> 27:decimal(37,16), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -11) -> 28:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -12) -> 29:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -9) -> 30:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -10) -> 31:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 32:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 33:decimal(21,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(21,0)), VALUE._col2 (type: decimal(21,0)), VALUE._col3 (type: decimal(21,0)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)), VALUE._col12 (type: decimal(21,0)), VALUE._col13 (type: decimal(21,0)), VALUE._col14 (type: decimal(21,0)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(22,1)), VALUE._col17 (type: decimal(23,2)), VALUE._col18 (type: decimal(24,3)), VALUE._col19 (type: decimal(25,4)), VALUE._col20 (type: decimal(26,5)), VALUE._col21 (type: decimal(27,6)), VALUE._col22 (type: decimal(28,7)), VALUE._col23 (type: decimal(29,8)), VALUE._col24 (type: decimal(30,9)), VALUE._col25 (type: decimal(31,10)), VALUE._col26 (type: decimal(32,11)), VALUE._col27 (type: decimal(33,12)), VALUE._col28 (type: decimal(34,13)), VALUE._col28 (type: decimal(34,13)), VALUE._col29 (type: decimal(35,14)), VALUE._col30 (type: decimal(36,15)), VALUE._col31 (type: decimal(37,16)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -412,14 +529,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_4_orc #### A masked pattern was here #### 1809242.315111134400000000 -1809242.315111134400000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -437,26 +558,62 @@ STAGE PLANS: TableScan alias: decimal_tbl_4_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 2:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 9) -> 3:decimal(30,9) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(30,9)), VALUE._col0 (type: decimal(30,9)), 1809242.315111134 (type: decimal(17,9)), -1809242.315111134 (type: decimal(17,9)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + selectExpressions: ConstantVectorExpression(val 1809242.315111134) -> 2:decimal(17,9), ConstantVectorExpression(val -1809242.315111134) -> 3:decimal(17,9) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out index 5ea9f4d..e00de78 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out @@ -48,14 +48,18 @@ POSTHOOK: Input: default@decimal_udf2_txt POSTHOOK: Output: default@decimal_udf2 POSTHOOK: Lineage: decimal_udf2.key SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf2.value SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:value, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -70,15 +74,30 @@ STAGE PLANS: TableScan alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8] + selectExpressions: ConstantVectorExpression(val NaN) -> 2:double, ConstantVectorExpression(val NaN) -> 3:double, ConstantVectorExpression(val 1.4711276743037347) -> 4:double, ConstantVectorExpression(val -0.8390715290764524) -> 5:double, ConstantVectorExpression(val -0.5440211108893698) -> 6:double, ConstantVectorExpression(val 0.6483608274590866) -> 7:double, ConstantVectorExpression(val 0.17453292519943295) -> 8:double Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -86,6 +105,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -104,20 +131,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### NaN NaN 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -132,15 +163,30 @@ STAGE PLANS: TableScan alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 2:double, ConstantVectorExpression(val 2.302585092994046) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 1.0) -> 5:double, FuncLogWithBaseLongToDouble(col 1) -> 6:double, VectorUDFAdaptor(log(value, 10)) -> 7:double, ConstantVectorExpression(val 1.0) -> 8:double, ConstantVectorExpression(val 3.1622776601683795) -> 9:double Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -148,6 +194,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out index 620b99e..698cc9c 100644 --- ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,11 +128,24 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: t, s + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -137,13 +154,38 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -151,9 +193,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_elt.q.out ql/src/test/results/clientpositive/llap/vector_elt.q.out index bb66867..44ba6de 100644 --- ql/src/test/results/clientpositive/llap/vector_elt.q.out +++ ql/src/test/results/clientpositive/llap/vector_elt.q.out @@ -1,29 +1,80 @@ -PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 935842 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean + predicate: (ctinyint > 0) (type: boolean) + Statistics: Num rows: 4096 Data size: 312018 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 6, 2, 16] + selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string + Statistics: Num rows: 4096 Data size: 1069830 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (ctinyint > 0) (type: boolean) - Select Operator - expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc @@ -47,7 +98,7 @@ POSTHOOK: Input: default@alltypesorc 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 2 cvLH6Eat2yFsyy7p 528534767 528534767 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -60,7 +111,7 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -73,22 +124,68 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: ConstantVectorExpression(val defg) -> 12:string, ConstantVectorExpression(val cc) -> 13:string, ConstantVectorExpression(val abc) -> 14:string, ConstantVectorExpression(val 2) -> 15:string, ConstantVectorExpression(val 12345) -> 16:string, ConstantVectorExpression(val 123456789012) -> 17:string, ConstantVectorExpression(val 1.25) -> 18:string, ConstantVectorExpression(val 16.0) -> 19:string, ConstantVectorExpression(val null) -> 20:string, ConstantVectorExpression(val null) -> 21:string + Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Limit - Number of rows: 1 - ListSink + ListSink PREHOOK: query: SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), diff --git ql/src/test/results/clientpositive/llap/vector_empty_where.q.out ql/src/test/results/clientpositive/llap/vector_empty_where.q.out new file mode 100644 index 0000000..9f93f86 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_empty_where.q.out @@ -0,0 +1,648 @@ +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cstring1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13)(children: CastLongToBooleanViaLongToLong(col 12)(children: StringLength(col 6) -> 12:Long) -> 13:long) -> boolean + predicate: cstring1 (type: string) + Statistics: Num rows: 6144 Data size: 449620 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + Statistics: Num rows: 6144 Data size: 449620 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cstring1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cstring1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +6041 +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cint +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 12)(children: CastLongToBooleanViaLongToLong(col 2) -> 12:long) -> boolean + predicate: cint (type: int) + Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +6082 +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cfloat +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cfloat +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 12)(children: CastDoubleToBooleanViaDoubleToLong(col 4) -> 12:long) -> boolean + predicate: cfloat (type: float) + Statistics: Num rows: 6144 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + Statistics: Num rows: 6144 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cfloat +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cfloat +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3022 +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where ctimestamp1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where ctimestamp1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 528216 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 12)(children: CastTimestampToBoolean(col 8) -> 12:long) -> boolean + predicate: ctimestamp1 (type: timestamp) + Statistics: Num rows: 6144 Data size: 264108 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + Statistics: Num rows: 6144 Data size: 264108 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3022 diff --git ql/src/test/results/clientpositive/llap/vector_groupby4.q.out ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index 9ecfa56..ad312cf 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -18,14 +18,18 @@ POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -46,21 +50,55 @@ STAGE PLANS: TableScan alias: srcorc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: substr(key, 1, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: StringSubstrColStartLen(col 0, start 0, length 1) -> 2:string Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 @@ -69,17 +107,37 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby6.q.out ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 4c6e038..b9e7c65 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -18,14 +18,18 @@ POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -46,21 +50,55 @@ STAGE PLANS: TableScan alias: srcorc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: substr(value, 5, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: StringSubstrColStartLen(col 1, start 4, length 1) -> 2:string Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 @@ -69,17 +107,37 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out index 742cebc..80a5696 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,12 +128,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string), b (type: bigint) outputColumnNames: t, s, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [0] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -138,15 +156,41 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -154,9 +198,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index a0a3393..249f1f3 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -1,74 +1,241 @@ Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from src where not key in (select key from src) order by key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from src where not key in (select key from src) order by key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_37] - Select Operator [SEL_36] (rows=500 width=178) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - Select Operator [SEL_21] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_20] (rows=500 width=198) - predicate:((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) - Map Join Operator [MAPJOIN_28] (rows=500 width=198) - Conds:MAPJOIN_27._col0=RS_35._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] - <-Reducer 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_35] - PartitionCols:_col0 - Select Operator [SEL_34] (rows=205 width=91) - Output:["_col0","_col1"] - Group By Operator [GBY_33] (rows=205 width=87) - Output:["_col0"],keys:KEY._col0 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=205 width=87) - Output:["_col0"],keys:key - TableScan [TS_8] (rows=500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map Join Operator [MAPJOIN_27] (rows=500 width=194) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Reducer 4 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_32] - Group By Operator [GBY_31] (rows=1 width=16) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"] - <-Map 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_5] - Group By Operator [GBY_4] (rows=1 width=16) - Output:["_col0","_col1"],aggregations:["count()","count(key)"] - Select Operator [SEL_3] (rows=500 width=87) - Output:["key"] - TableScan [TS_2] (rows=500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(key) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 6 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + selectExpressions: ConstantVectorExpression(val 1) -> 1:long + Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index c4bcbab..8599e97 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -211,7 +211,7 @@ POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sa POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number from @@ -219,7 +219,7 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number from @@ -227,6 +227,10 @@ from group by ss_ticket_number limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -244,11 +248,24 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9 + native: false + projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -257,23 +274,55 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -324,7 +373,7 @@ POSTHOOK: Input: default@store_sales 18 19 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(ss_ticket_number) m from @@ -336,7 +385,7 @@ from group by ss_ticket_number order by m PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(ss_ticket_number) m from @@ -348,6 +397,10 @@ from group by ss_ticket_number order by m POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -366,11 +419,24 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9 + native: false + projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -379,19 +445,51 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 @@ -399,20 +497,43 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -533,7 +654,7 @@ POSTHOOK: Input: default@store_sales 80 81 82 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -545,7 +666,7 @@ from group by ss_ticket_number order by ss_ticket_number PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -557,6 +678,10 @@ from group by ss_ticket_number order by ss_ticket_number POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -575,12 +700,26 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9, 2, 10] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9, col 2 + native: false + projectedOutputColumns: [0] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -589,15 +728,42 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 2) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -605,9 +771,20 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] keys: _col1 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 @@ -615,17 +792,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -746,7 +942,7 @@ POSTHOOK: Input: default@store_sales 80 151471 704 81 105109 429 82 55611 254 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from @@ -758,7 +954,7 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from @@ -770,6 +966,10 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -788,12 +988,26 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9, 2, 10] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9, col 2 + native: false + projectedOutputColumns: [0] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -802,15 +1016,41 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 2) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -818,9 +1058,20 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: _col1 (type: int), _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 @@ -828,17 +1079,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index 77a0695..2176917 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -123,16 +123,20 @@ POSTHOOK: Lineage: store.s_street_type SIMPLE [(store_txt)store_txt.FieldSchema( POSTHOOK: Lineage: store.s_suite_number SIMPLE [(store_txt)store_txt.FieldSchema(name:s_suite_number, type:string, comment:null), ] POSTHOOK: Lineage: store.s_tax_precentage SIMPLE [(store_txt)store_txt.FieldSchema(name:s_tax_precentage, type:decimal(5,2), comment:null), ] POSTHOOK: Lineage: store.s_zip SIMPLE [(store_txt)store_txt.FieldSchema(name:s_zip, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s_store_id from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s_store_id from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -166,8 +170,19 @@ STAGE PLANS: Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: Pruning grouping set id not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) @@ -208,16 +223,20 @@ AAAAAAAAEAAAAAAA AAAAAAAAHAAAAAAA AAAAAAAAIAAAAAAA AAAAAAAAKAAAAAAA -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -251,10 +270,29 @@ STAGE PLANS: Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -262,9 +300,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_if_expr.q.out ql/src/test/results/clientpositive/llap/vector_if_expr.q.out index 555340d..45cf8e6 100644 --- ql/src/test/results/clientpositive/llap/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/llap/vector_if_expr.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -21,29 +25,68 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsTrue(col 10) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: (cboolean1 and cboolean1 is not null) (type: boolean) Statistics: Num rows: 4587 Data size: 13704 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 12] + selectExpressions: IfExprStringScalarStringScalar(col 10, val first, val second) -> 12:String Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out index c3c5773..0ebc3fd 100644 --- ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out @@ -163,16 +163,20 @@ POSTHOOK: Lineage: customer_demographics.cd_gender SIMPLE [(customer_demographic POSTHOOK: Lineage: customer_demographics.cd_marital_status SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_marital_status, type:string, comment:null), ] POSTHOOK: Lineage: customer_demographics.cd_purchase_estimate SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_purchase_estimate, type:int, comment:null), ] Warning: Map Join MAPJOIN[13][bigTable=store_sales] in task 'Map 2' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -191,53 +195,119 @@ STAGE PLANS: TableScan alias: customer_demographics Statistics: Num rows: 200 Data size: 74200 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, Uniform Hash IS false Statistics: Num rows: 200 Data size: 74200 Basic stats: COMPLETE Column stats: NONE value expressions: cd_demo_sk (type: int), cd_marital_status (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col2, _col16 input vertices: 0 Map 1 Statistics: Num rows: 200000 Data size: 92055200 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 23, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 24, val M) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 23, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 24, val U) -> boolean) -> boolean) -> boolean predicate: (((_col0 = _col16) and (_col2 = 'M')) or ((_col0 = _col16) and (_col2 = 'U'))) (type: boolean) Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 25:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index d50123d..a854e18 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -28,12 +28,16 @@ POSTHOOK: query: insert into table orc_table_2a values(0),(2), (3),(null),(4) POSTHOOK: type: QUERY POSTHOOK: Output: default@orc_table_2a POSTHOOK: Lineage: orc_table_2a.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -51,12 +55,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -64,6 +79,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col1 input vertices: 1 Map 2 @@ -71,9 +93,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -81,25 +110,66 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -118,12 +188,16 @@ POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -141,12 +215,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -154,12 +239,23 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -167,19 +263,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -188,9 +314,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -241,12 +384,16 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@orc_table_2b POSTHOOK: Lineage: orc_table_2b.c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -264,12 +411,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -277,6 +435,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [2, 0] + smallTableMapping: [2] outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -284,9 +450,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -294,26 +467,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -332,12 +547,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -355,32 +574,71 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -388,12 +646,24 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [2, 0, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -401,6 +671,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Stage: Stage-0 Fetch Operator @@ -419,12 +703,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 3 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -442,12 +730,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -455,6 +754,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2, 0] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -462,9 +770,17 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 1] + selectExpressions: LongColMultiplyLongScalar(col 0, val 2) -> 3:long, LongColMultiplyLongScalar(col 0, val 5) -> 4:long Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -472,26 +788,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, bigint, bigint Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -510,12 +868,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 6 15 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -533,12 +895,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -546,6 +919,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -553,9 +935,16 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -563,26 +952,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -601,12 +1032,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -624,12 +1059,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -637,6 +1083,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [1, 2, 0] + smallTableMapping: [2] outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -644,9 +1099,16 @@ STAGE PLANS: Select Operator expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -654,26 +1116,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -692,12 +1196,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### 3 three THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -715,32 +1223,71 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -748,6 +1295,15 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [2, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 @@ -755,9 +1311,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -765,6 +1328,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Stage: Stage-0 Fetch Operator @@ -783,12 +1360,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -806,32 +1387,71 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -839,6 +1459,15 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [2, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 @@ -846,9 +1475,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -856,6 +1492,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_interval_1.q.out ql/src/test/results/clientpositive/llap/vector_interval_1.q.out index 207e9bb..6da29e0 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_1.q.out @@ -38,20 +38,24 @@ POSTHOOK: Lineage: vector_interval_1.dt EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,26 +73,62 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: str1 (type: string), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time) outputColumnNames: _col0, _col2, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 4, 5] + selectExpressions: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col4 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: interval_year_month), VALUE._col0 (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 4, 2] + selectExpressions: ConstantVectorExpression(val 14) -> 3:long, ConstantVectorExpression(val 1 02:03:04.000000000) -> 4:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -119,7 +159,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 1-2 NULL 1 02:03:04.000000000 NULL 1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -130,7 +170,7 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -141,6 +181,10 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -158,26 +202,62 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 6, 5, 8, 7] + selectExpressions: IntervalYearMonthColAddIntervalYearMonthColumn(col 4, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 5:long, IntervalYearMonthColSubtractIntervalYearMonthColumn(col 4, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, IntervalYearMonthScalarSubtractIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 7:long Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2-4 (type: interval_year_month), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), 0-0 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4] + selectExpressions: ConstantVectorExpression(val 28) -> 5:long, ConstantVectorExpression(val 0) -> 6:long Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -216,7 +296,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 2-4 NULL NULL 0-0 NULL NULL 2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -227,7 +307,7 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -238,6 +318,10 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -255,26 +339,62 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 6, 5, 8, 7] + selectExpressions: IntervalDayTimeColAddIntervalDayTimeColumn(col 4, col 5)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time) -> 6:interval_day_time, IntervalDayTimeScalarAddIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 5:timestamp, IntervalDayTimeColSubtractIntervalDayTimeColumn(col 4, col 7)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 7:interval_day_time) -> 8:interval_day_time, IntervalDayTimeScalarSubtractIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 7:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2 04:06:08.000000000 (type: interval_day_time), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4] + selectExpressions: ConstantVectorExpression(val 2 04:06:08.000000000) -> 5:interval_day_time, ConstantVectorExpression(val 0 00:00:00.000000000) -> 6:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -313,7 +433,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL 2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -330,7 +450,7 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -347,6 +467,10 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -364,26 +488,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 6, 5, 8, 7, 10, 11, 13, 14, 15, 16, 17] + selectExpressions: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 4:long, DateColAddIntervalYearMonthColumn(col 1, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 1-2, col 1) -> 5:long, IntervalYearMonthColAddDateColumn(col 7, col 1)(children: CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, DateColSubtractIntervalYearMonthScalar(col 1, val 1-2) -> 7:long, DateColSubtractIntervalYearMonthColumn(col 1, col 9)(children: CastStringToIntervalYearMonth(col 2) -> 9:interval_year_month) -> 10:long, DateColAddIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 11:timestamp, DateColAddIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddDateColumn(val 1 02:03:04.000000000, col 1) -> 14:timestamp, IntervalDayTimeColAddDateColumn(col 12, col 1)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:interval_day_time, DateColSubtractIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 16:timestamp, DateColSubtractIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -434,7 +593,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -451,7 +610,7 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -468,6 +627,10 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -485,26 +648,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17] + selectExpressions: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 4:timestamp, TimestampColAddIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 1-2, col 0) -> 7:timestamp, IntervalYearMonthColAddTimestampColumn(col 5, col 0)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-2) -> 9:timestamp, TimestampColSubtractIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:timestamp, TimestampColAddIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 11:timestamp, TimestampColAddIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 1 02:03:04.000000000, col 0) -> 14:timestamp, IntervalDayTimeColAddTimestampColumn(col 12, col 0)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 16:timestamp, TimestampColSubtractIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -555,7 +753,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts, ts - ts, @@ -563,7 +761,7 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts, ts - ts, @@ -571,6 +769,10 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -588,26 +790,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 5, 6] + selectExpressions: TimestampColSubtractTimestampColumn(col 0, col 0) -> 4:interval_day_time, TimestampScalarSubtractTimestampColumn(val 2001-01-01 01:02:03.0, col 0) -> 5:timestamp, TimestampColSubtractTimestampScalar(col 0, val 2001-01-01 01:02:03.0) -> 6:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -640,7 +877,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL NULL NULL NULL 2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, dt - dt, @@ -648,7 +885,7 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, dt - dt, @@ -656,6 +893,10 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -673,26 +914,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 5, 6] + selectExpressions: DateColSubtractDateColumn(col 1, col 1) -> 4:timestamp, DateScalarSubtractDateColumn(val 2001-01-01 00:00:00.0, col 1) -> 5:timestamp, DateColSubtractDateScalar(col 1, val 2001-01-01 00:00:00.0) -> 6:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -725,7 +1001,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL NULL NULL NULL 2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, ts - dt, @@ -736,7 +1012,7 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, ts - dt, @@ -747,6 +1023,10 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -764,26 +1044,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 5, 6, 7, 8, 9] + selectExpressions: TimestampColSubtractDateColumn(col 0, col 1) -> 4:interval_day_time, TimestampScalarSubtractDateColumn(val 2001-01-01 01:02:03.0, col 1) -> 5:interval_day_time, TimestampColSubtractDateScalar(col 0, val 2001-01-01 00:00:00.0) -> 6:interval_day_time, DateColSubtractTimestampColumn(col 1, col 0) -> 7:interval_day_time, DateColSubtractTimestampScalar(col 1, val 2001-01-01 01:02:03.0) -> 8:interval_day_time, DateScalarSubtractTimestampColumn(val 2001-01-01 00:00:00.0, col 0) -> 9:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_interval_2.q.out ql/src/test/results/clientpositive/llap/vector_interval_2.q.out index d84737c..d14217c 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_2.q.out @@ -42,7 +42,7 @@ POSTHOOK: Lineage: vector_interval_2.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_2.str3 EXPRESSION [] POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION [] POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION [] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str1, -- Should all be true @@ -74,7 +74,7 @@ select interval '1-2' year to month != interval_year_month(str2) from vector_interval_2 order by str1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str1, -- Should all be true @@ -106,6 +106,10 @@ select interval '1-2' year to month != interval_year_month(str2) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -123,26 +127,61 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 8, 9, 10, 11, 12, 13, 14, 15, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] + selectExpressions: LongColEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, LongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 9:long, LongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 10:long, LongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 11:long, LongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 12:long, LongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 13:long, LongColGreaterLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 14:long, LongColNotEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 15:long, IntervalYearMonthColEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long, IntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 16:long, IntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 17:long, IntervalYearMonthColLessIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 18:long, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 19:long, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 20:long, IntervalYearMonthColGreaterIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 21:long, IntervalYearMonthColNotEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 22:long, IntervalYearMonthScalarEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 23:long, IntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 24:long, IntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 25:long, IntervalYearMonthScalarLessIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 26:long, IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 27:long, IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 28:long, IntervalYearMonthScalarGreaterIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 29:long, IntervalYearMonthScalarNotEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 30:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -223,7 +262,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str1, -- Should all be false @@ -249,7 +288,7 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str1, -- Should all be false @@ -275,6 +314,10 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -292,26 +335,61 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < 1-2) (type: boolean), (1-2 <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > 1-3) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 8, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 15, 20, 21] + selectExpressions: LongColNotEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, IntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 7:long, IntervalYearMonthColLessIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 9:long, IntervalYearMonthScalarNotEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 10:long, IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 11:long, IntervalYearMonthScalarGreaterIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 12:long, IntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 13:long, IntervalYearMonthScalarLessIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 14:long, LongColGreaterEqualLongColumn(col 6, col 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 15:interval_year_month) -> 16:long, LongColGreaterLongColumn(col 6, col 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 15:interval_year_month) -> 17:long, LongColLessEqualLongColumn(col 6, col 15)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 15:interval_year_month) -> 18:long, LongColLessLongColumn(col 6, col 15)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 15:interval_year_month) -> 19:long, IntervalYearMonthColNotEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 15:long, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 20:long, IntervalYearMonthColGreaterIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 21:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 1, 6, 7, 8, 9, 10, 6, 11, 12, 13, 14, 15, 11] Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -380,7 +458,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 false false false false false false false false false false false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str3, -- Should all be true @@ -412,7 +490,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str3, -- Should all be true @@ -444,6 +522,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -461,26 +543,61 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) (type: boolean), (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] + selectExpressions: IntervalDayTimeColEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 8:long, IntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 9:long, IntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 10:long, IntervalDayTimeColLessIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 11:long, IntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 12:long, IntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 13:long, IntervalDayTimeColGreaterIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 14:long, IntervalDayTimeColNotEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 15:long, IntervalDayTimeColEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 16:long, IntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 17:long, IntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 18:long, IntervalDayTimeColLessIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 19:long, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 20:long, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 21:long, IntervalDayTimeColGreaterIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 22:long, IntervalDayTimeColNotEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 23:long, IntervalDayTimeScalarEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 24:long, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 25:long, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 26:long, IntervalDayTimeScalarLessIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 27:long, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 28:long, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 29:long, IntervalDayTimeScalarGreaterIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 30:long, IntervalDayTimeScalarNotEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 31:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -561,7 +678,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str3, -- Should all be false @@ -587,7 +704,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str3, -- Should all be false @@ -613,6 +730,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -630,26 +751,61 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < 1 02:03:04.000000000) (type: boolean), (1 02:03:04.000000000 <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > 1 02:03:05.000000000) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + selectExpressions: IntervalDayTimeColNotEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 8:long, IntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 9:long, IntervalDayTimeColLessIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 10:long, IntervalDayTimeScalarNotEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 11:long, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 12:long, IntervalDayTimeScalarGreaterIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 13:long, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 14:long, IntervalDayTimeScalarLessIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 15:long, IntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 16:long, IntervalDayTimeColGreaterIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 17:long, IntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 18:long, IntervalDayTimeColLessIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 19:long, IntervalDayTimeColNotEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 20:long, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 21:long, IntervalDayTimeColGreaterIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 22:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 1, 6, 7, 8, 9, 10, 6, 11, 12, 13, 14, 15, 11] Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -718,7 +874,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 false false false false false false false false false false false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -743,7 +899,7 @@ where and interval '1-3' year to month > interval_year_month(str1) order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -768,6 +924,10 @@ where and interval '1-3' year to month > interval_year_month(str1) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -785,28 +945,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> boolean, FilterLongColNotEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> boolean, FilterLongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> boolean, FilterLongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> boolean, FilterLongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> boolean, FilterLongColGreaterLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> boolean, FilterIntervalYearMonthColEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColNotEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColLessIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColGreaterIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarNotEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarLessIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarGreaterIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean) -> boolean predicate: ((CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) and (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -872,7 +1070,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -897,7 +1095,7 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -922,6 +1120,10 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -939,28 +1141,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterIntervalDayTimeColEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColNotEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColLessIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColNotEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColLessIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarNotEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarLessIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarGreaterIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean) -> boolean predicate: ((CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) and (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1026,7 +1266,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -1046,7 +1286,7 @@ where and dt != dt + interval '1-2' year to month order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -1066,6 +1306,10 @@ where and dt != dt + interval '1-2' year to month order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1083,28 +1327,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDateScalarEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateScalarLessEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateScalarGreaterEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateColEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateColLessEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateColGreaterEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterLongColNotEqualLongColumn(col 1, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateScalarEqualDateColumn(val 11747, col 6)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 6:long) -> boolean, FilterDateScalarLessEqualDateColumn(val 11747, col 6)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 6:long) -> boolean, FilterDateScalarGreaterEqualDateColumn(val 11747, col 6)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 6:long) -> boolean, FilterDateColEqualDateScalar(col 6, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 6:long) -> boolean, FilterDateColLessEqualDateScalar(col 6, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 6:long) -> boolean, FilterDateColGreaterEqualDateScalar(col 6, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 6:long) -> boolean, FilterLongColNotEqualLongColumn(col 1, col 6)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 6:long) -> boolean) -> boolean predicate: ((2002-03-01 = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 <= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 >= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) <= 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) >= 2002-03-01) and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 = (dt + 1-2)) and (2002-03-01 <= (dt + 1-2)) and (2002-03-01 >= (dt + 1-2)) and ((dt + 1-2) = 2002-03-01) and ((dt + 1-2) <= 2002-03-01) and ((dt + 1-2) >= 2002-03-01) and (dt <> (dt + 1-2))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1160,7 +1442,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1185,7 +1467,7 @@ where and ts > ts - interval '1' year order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1210,6 +1492,10 @@ where and ts > ts - interval '1' year order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1227,28 +1513,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2002-03-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarLessEqualTimestampColumn(val 2002-03-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterEqualTimestampColumn(val 2002-03-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarNotEqualTimestampColumn(val 2002-04-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarLessTimestampColumn(val 2002-02-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterTimestampColumn(val 2002-04-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampScalar(col 6, val 2002-03-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampScalar(col 6, val 2002-03-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampScalar(col 6, val 2002-03-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampScalar(col 6, val 2002-04-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampScalar(col 6, val 2002-02-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampScalar(col 6, val 2002-04-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 0-0) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean) -> boolean predicate: ((2002-03-01 01:02:03.0 = (ts + 1-2)) and (2002-03-01 01:02:03.0 <= (ts + 1-2)) and (2002-03-01 01:02:03.0 >= (ts + 1-2)) and (2002-04-01 01:02:03.0 <> (ts + 1-2)) and (2002-02-01 01:02:03.0 < (ts + 1-2)) and (2002-04-01 01:02:03.0 > (ts + 1-2)) and ((ts + 1-2) = 2002-03-01 01:02:03.0) and ((ts + 1-2) >= 2002-03-01 01:02:03.0) and ((ts + 1-2) <= 2002-03-01 01:02:03.0) and ((ts + 1-2) <> 2002-04-01 01:02:03.0) and ((ts + 1-2) > 2002-02-01 01:02:03.0) and ((ts + 1-2) < 2002-04-01 01:02:03.0) and (ts = (ts + 0-0)) and (ts <> (ts + 1-0)) and (ts <= (ts + 1-0)) and (ts < (ts + 1-0)) and (ts >= (ts - 1-0)) and (ts > (ts - 1-0))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1314,7 +1638,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1339,7 +1663,7 @@ where and ts > dt - interval '0 1:2:4' day to second order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1364,6 +1688,10 @@ where and ts > dt - interval '0 1:2:4' day to second order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1381,28 +1709,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampColumn(col 0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampColumn(col 0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean) -> boolean predicate: ((2001-01-01 01:02:03.0 = (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 <> (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 <= (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 < (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 >= (dt - 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 > (dt - 0 01:02:04.000000000)) and ((dt + 0 01:02:03.000000000) = 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) <> 2001-01-01 01:02:03.0) and ((dt + 0 01:02:03.000000000) >= 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) > 2001-01-01 01:02:03.0) and ((dt - 0 01:02:03.000000000) <= 2001-01-01 01:02:03.0) and ((dt - 0 01:02:04.000000000) < 2001-01-01 01:02:03.0) and (ts = (dt + 0 01:02:03.000000000)) and (ts <> (dt + 0 01:02:04.000000000)) and (ts <= (dt + 0 01:02:03.000000000)) and (ts < (dt + 0 01:02:04.000000000)) and (ts >= (dt - 0 01:02:03.000000000)) and (ts > (dt - 0 01:02:04.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1468,7 +1834,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1493,7 +1859,7 @@ where and ts > ts - interval '1' day order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1518,6 +1884,10 @@ where and ts > ts - interval '1' day order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1535,28 +1905,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 0 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 0 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 0 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean) -> boolean predicate: ((2001-01-01 01:02:03.0 = (ts + 0 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <> (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <= (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 < (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 >= (ts - 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 > (ts - 1 00:00:00.000000000)) and ((ts + 0 00:00:00.000000000) = 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) <> 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) >= 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) > 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) <= 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) < 2001-01-01 01:02:03.0) and (ts = (ts + 0 00:00:00.000000000)) and (ts <> (ts + 1 00:00:00.000000000)) and (ts <= (ts + 1 00:00:00.000000000)) and (ts < (ts + 1 00:00:00.000000000)) and (ts >= (ts - 1 00:00:00.000000000)) and (ts > (ts - 1 00:00:00.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out index 391c775..ee8aa0c 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out @@ -35,7 +35,7 @@ POSTHOOK: Output: default@interval_arithmetic_1 POSTHOOK: Lineage: interval_arithmetic_1.dateval EXPRESSION [(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, comment:null), ] POSTHOOK: Lineage: interval_arithmetic_1.tsval SIMPLE [(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, comment:null), ] tsval tsval -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -47,7 +47,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -60,6 +60,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -77,26 +81,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 2-2) (type: date), (dateval - -2-2) (type: date), (dateval + 2-2) (type: date), (dateval + -2-2) (type: date), (-2-2 + dateval) (type: date), (2-2 + dateval) (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7] + selectExpressions: DateColSubtractIntervalYearMonthScalar(col 0, val 2-2) -> 2:long, DateColSubtractIntervalYearMonthScalar(col 0, val -2-2) -> 3:long, DateColAddIntervalYearMonthScalar(col 0, val 2-2) -> 4:long, DateColAddIntervalYearMonthScalar(col 0, val -2-2) -> 5:long, IntervalYearMonthScalarAddDateColumn(val -2-2, col 0) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 2-2, col 0) -> 7:long Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -186,7 +225,7 @@ dateval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 9073-04-13 9077-08-13 9077-08-13 9073-04-13 9073-04-13 9077-08-13 9209-11-11 9207-09-11 9212-01-11 9212-01-11 9207-09-11 9207-09-11 9212-01-11 9403-01-09 9400-11-09 9405-03-09 9405-03-09 9400-11-09 9400-11-09 9405-03-09 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -195,7 +234,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -205,6 +244,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -222,26 +265,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 1999-06-07) (type: interval_day_time), (1999-06-07 - dateval) (type: interval_day_time), (dateval - dateval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4] + selectExpressions: DateColSubtractDateScalar(col 0, val 1999-06-07 00:00:00.0) -> 2:timestamp, DateScalarSubtractDateColumn(val 1999-06-07 00:00:00.0, col 0) -> 3:timestamp, DateColSubtractDateColumn(col 0, col 0) -> 4:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -325,7 +403,7 @@ dateval _c1 _c2 _c3 9075-06-13 2584462 00:00:00.000000000 -2584462 00:00:00.000000000 0 00:00:00.000000000 9209-11-11 2633556 01:00:00.000000000 -2633556 01:00:00.000000000 0 00:00:00.000000000 9403-01-09 2704106 01:00:00.000000000 -2704106 01:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -337,7 +415,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -350,6 +428,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -367,26 +449,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tsval (type: timestamp), (tsval - 2-2) (type: timestamp), (tsval - -2-2) (type: timestamp), (tsval + 2-2) (type: timestamp), (tsval + -2-2) (type: timestamp), (-2-2 + tsval) (type: timestamp), (2-2 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7] + selectExpressions: TimestampColSubtractIntervalYearMonthScalar(col 1, val 2-2) -> 2:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 1, val -2-2) -> 3:timestamp, TimestampColAddIntervalYearMonthScalar(col 1, val 2-2) -> 4:timestamp, TimestampColAddIntervalYearMonthScalar(col 1, val -2-2) -> 5:timestamp, IntervalYearMonthScalarAddTimestampColumn(val -2-2, col 1) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 2-2, col 1) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -476,7 +593,7 @@ tsval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9209-11-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9212-01-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9403-01-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 9405-03-09 18:12:33.547 9400-11-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -484,7 +601,7 @@ from interval_arithmetic_1 order by interval '2-2' year to month + interval '3-3' year to month limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -493,6 +610,10 @@ order by interval '2-2' year to month + interval '3-3' year to month limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -510,27 +631,65 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: CAST( 5-5 AS INTERVAL YEAR TO MONTH) (type: interval_year_month) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 5-5 (type: interval_year_month), -1-1 (type: interval_year_month) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: ConstantVectorExpression(val 65) -> 1:long, ConstantVectorExpression(val -13) -> 2:long Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -564,7 +723,7 @@ POSTHOOK: Input: default@interval_arithmetic_1 _c0 _c1 5-5 -1-1 5-5 -1-1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -576,7 +735,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -589,6 +748,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -606,26 +769,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 99 11:22:33.123456789) (type: timestamp), (dateval - -99 11:22:33.123456789) (type: timestamp), (dateval + 99 11:22:33.123456789) (type: timestamp), (dateval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + dateval) (type: timestamp), (99 11:22:33.123456789 + dateval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7] + selectExpressions: DateColSubtractIntervalDayTimeScalar(col 0, val 99 11:22:33.123456789) -> 2:timestamp, DateColSubtractIntervalDayTimeScalar(col 0, val -99 11:22:33.123456789) -> 3:timestamp, DateColAddIntervalDayTimeScalar(col 0, val 99 11:22:33.123456789) -> 4:timestamp, DateColAddIntervalDayTimeScalar(col 0, val -99 11:22:33.123456789) -> 5:timestamp, IntervalDayTimeScalarAddDateColumn(val -99 11:22:33.123456789, col 0) -> 6:timestamp, IntervalDayTimeScalarAddDateColumn(val 99 11:22:33.123456789, col 0) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -715,7 +913,7 @@ dateval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9075-09-20 11:22:33.123456789 9075-03-05 11:37:26.876543211 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9209-11-11 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9210-02-18 11:22:33.123456789 9209-08-03 13:37:26.876543211 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9403-01-09 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 9403-04-18 12:22:33.123456789 9402-10-01 13:37:26.876543211 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, tsval, @@ -725,7 +923,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, tsval, @@ -736,6 +934,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -753,26 +955,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), tsval (type: timestamp), (dateval - tsval) (type: interval_day_time), (tsval - dateval) (type: interval_day_time), (tsval - tsval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + selectExpressions: DateColSubtractTimestampColumn(col 0, col 1) -> 2:interval_day_time, TimestampColSubtractDateColumn(col 1, col 0) -> 3:interval_day_time, TimestampColSubtractTimestampColumn(col 1, col 1) -> 4:interval_day_time Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -858,7 +1095,7 @@ dateval tsval _c2 _c3 _c4 9075-06-13 9075-06-13 16:20:09.218517797 -0 16:20:09.218517797 0 16:20:09.218517797 0 00:00:00.000000000 9209-11-11 9209-11-11 04:08:58.223768453 -0 04:08:58.223768453 0 04:08:58.223768453 0 00:00:00.000000000 9403-01-09 9403-01-09 18:12:33.547 -0 18:12:33.547000000 0 18:12:33.547000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -870,7 +1107,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -883,6 +1120,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -900,26 +1141,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tsval (type: timestamp), (tsval - 99 11:22:33.123456789) (type: timestamp), (tsval - -99 11:22:33.123456789) (type: timestamp), (tsval + 99 11:22:33.123456789) (type: timestamp), (tsval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + tsval) (type: timestamp), (99 11:22:33.123456789 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7] + selectExpressions: TimestampColSubtractIntervalDayTimeScalar(col 1, val 99 11:22:33.123456789) -> 2:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 1, val -99 11:22:33.123456789) -> 3:timestamp, TimestampColAddIntervalDayTimeScalar(col 1, val 99 11:22:33.123456789) -> 4:timestamp, TimestampColAddIntervalDayTimeScalar(col 1, val -99 11:22:33.123456789) -> 5:timestamp, IntervalDayTimeScalarAddTimestampColumn(val -99 11:22:33.123456789, col 1) -> 6:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 99 11:22:33.123456789, col 1) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1009,14 +1285,14 @@ tsval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 16:20:09.218517797 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9075-09-21 03:42:42.341974586 9075-03-06 03:57:36.095061008 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9209-11-11 04:08:58.223768453 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9210-02-18 15:31:31.347225242 9209-08-03 17:46:25.100311664 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9403-01-09 18:12:33.547 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 9403-04-19 06:35:06.670456789 9402-10-02 07:50:00.423543211 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second from interval_arithmetic_1 limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second @@ -1024,6 +1300,10 @@ from interval_arithmetic_1 limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1038,15 +1318,29 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: 109 20:30:40.246913578 (type: interval_day_time), 89 02:14:26.000000000 (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + selectExpressions: ConstantVectorExpression(val 109 20:30:40.246913578) -> 2:interval_day_time, ConstantVectorExpression(val 89 02:14:26.000000000) -> 3:interval_day_time Statistics: Num rows: 50 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1054,6 +1348,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 0bc0e4c..53c728b 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -136,7 +136,7 @@ POSTHOOK: Lineage: vectortab_b_1korc.si SIMPLE [(vectortab_b_1k)vectortab_b_1k.F POSTHOOK: Lineage: vectortab_b_1korc.t SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts2 SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select v1.s, v2.s, @@ -158,7 +158,7 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select v1.s, v2.s, @@ -180,6 +180,10 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -197,12 +201,24 @@ STAGE PLANS: TableScan alias: vectortab_a_1korc Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 14] + selectExpressions: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -210,6 +226,10 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: interval_day_time) 1 _col0 (type: string), _col1 (type: interval_day_time) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -217,9 +237,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 8, 14] Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -227,25 +254,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: vectortab_b_1korc Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 14] + selectExpressions: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: interval_day_time) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: interval_day_time) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_join.q.out ql/src/test/results/clientpositive/llap/vector_join.q.out new file mode 100644 index 0000000..94c0290 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_join.q.out @@ -0,0 +1,104 @@ +PREHOOK: query: DROP TABLE IF EXISTS test1_vc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test1_vc +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS test2_vc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test2_vc +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test1_vc + ( + id string) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1_vc +POSTHOOK: query: CREATE TABLE test1_vc + ( + id string) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1_vc +PREHOOK: query: CREATE TABLE test2_vc( + id string + ) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' + ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2_vc +POSTHOOK: query: CREATE TABLE test2_vc( + id string + ) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' + ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2_vc +PREHOOK: query: SELECT cr.id1 , +cr.id2 +FROM +(SELECT t1.id id1, + t2.id id2 + from + (select * from test1_vc ) t1 + left outer join test2_vc t2 + on t1.id=t2.id) cr +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_vc +PREHOOK: Input: default@test2_vc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cr.id1 , +cr.id2 +FROM +(SELECT t1.id id1, + t2.id id2 + from + (select * from test1_vc ) t1 + left outer join test2_vc t2 + on t1.id=t2.id) cr +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_vc +POSTHOOK: Input: default@test2_vc +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index d7e5f58..5839815 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -10,7 +10,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -18,7 +18,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -26,6 +26,10 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -45,44 +49,103 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -90,29 +153,60 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col2, _col3 input vertices: 1 Reducer 5 Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -120,15 +214,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) @@ -157,7 +266,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -165,7 +274,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -173,6 +282,10 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -192,38 +305,89 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -231,29 +395,60 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col2, _col3 input vertices: 1 Reducer 5 Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -261,15 +456,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) @@ -298,7 +508,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -306,7 +516,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -314,6 +524,10 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -333,50 +547,116 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -384,29 +664,60 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col2, _col3 input vertices: 0 Reducer 2 Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -438,7 +749,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -449,7 +760,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -460,6 +771,10 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -480,63 +795,146 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -546,6 +944,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col2, _col3 input vertices: 1 Reducer 5 @@ -553,23 +956,50 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -577,28 +1007,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 7 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -632,7 +1092,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -643,7 +1103,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -654,6 +1114,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -675,59 +1139,134 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -744,6 +1283,10 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -753,14 +1296,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -768,28 +1327,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -823,7 +1412,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -834,7 +1423,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -845,6 +1434,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -866,59 +1459,134 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -935,6 +1603,10 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -944,14 +1616,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -959,28 +1647,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1014,7 +1732,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1025,7 +1743,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1036,6 +1754,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1057,59 +1779,134 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -1126,6 +1923,10 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -1135,14 +1936,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1150,28 +1967,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1205,7 +2052,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1216,7 +2063,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1227,6 +2074,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1248,59 +2099,134 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -1317,6 +2243,10 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -1326,14 +2256,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1341,28 +2287,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out index 7c9da3e..95dcba9 100644 --- ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out @@ -95,9 +95,9 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@char_tbl2 gpa=3 gpa=3.5 -PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY -POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index e8dfc30..767d978 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,6 +16,10 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,6 +72,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -84,6 +96,14 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -100,8 +120,23 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 57d63b3..9550114 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -76,12 +76,16 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -166,12 +170,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -256,12 +264,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -279,9 +291,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -292,6 +311,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -299,9 +323,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -309,23 +340,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: tjoin2 Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -346,12 +404,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -369,9 +431,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -382,6 +451,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -389,9 +463,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -399,23 +480,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: tjoin2 Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -436,12 +544,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -459,9 +571,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -472,6 +591,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -479,9 +602,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -489,23 +619,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: tjoin2 Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -526,12 +683,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -549,9 +710,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -562,6 +730,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -569,9 +741,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -579,23 +758,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: tjoin2 Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 6300316..e2f111d 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -128,91 +128,17 @@ POSTHOOK: query: select * from t4 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PREHOOK: query: explain vectorization only summary -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -230,91 +156,15 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -334,91 +184,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -430,100 +204,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 +PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 #### A masked pattern was here #### POSTHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY @@ -541,91 +235,15 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -640,95 +258,15 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -743,95 +281,15 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -843,91 +301,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -944,91 +326,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -1059,215 +365,39 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 #### A masked pattern was here #### 0 val_0 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -1295,636 +425,93 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 val_0 -0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -1952,120 +539,27 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -2100,107 +594,119 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 #### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +16 +18 +20 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -2256,113 +762,15 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -2411,83 +819,15 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -2499,10 +839,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2601,10 +947,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2705,10 +1057,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2801,10 +1159,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2912,10 +1276,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3011,10 +1381,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3114,10 +1490,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3214,10 +1596,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3315,10 +1703,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3430,10 +1824,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3530,10 +1930,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3666,10 +2072,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3776,10 +2188,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3915,10 +2333,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4048,10 +2472,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4189,10 +2619,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4330,10 +2766,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4473,10 +2915,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4627,10 +3075,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4782,10 +3236,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4870,10 +3330,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4881,80 +3347,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -4972,10 +3442,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4983,80 +3459,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5076,10 +3556,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5087,80 +3573,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5172,10 +3662,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5183,84 +3679,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -5283,10 +3782,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5294,80 +3799,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -5382,10 +3891,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5393,84 +3908,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -5485,10 +4003,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5496,84 +4020,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -5581,95 +4108,105 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5686,10 +4223,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5697,80 +4240,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -5801,10 +4348,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5812,80 +4365,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5901,10 +4458,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5912,104 +4475,108 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -6037,10 +4604,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6048,80 +4621,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -6147,10 +4724,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6158,107 +4741,112 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6286,10 +4874,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6297,89 +4891,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6419,10 +5020,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6430,94 +5037,91 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6560,10 +5164,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6571,94 +5181,91 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6701,10 +5308,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6712,94 +5325,91 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6844,105 +5454,108 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6998,10 +5611,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7009,102 +5628,107 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -7153,10 +5777,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7164,72 +5794,68 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -7241,10 +5867,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7263,7 +5895,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7272,6 +5911,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -7279,22 +5923,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7303,18 +5982,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7343,10 +6057,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7365,7 +6085,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7374,6 +6101,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -7381,22 +6113,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7405,18 +6172,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7447,10 +6249,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7469,7 +6277,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7478,6 +6293,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -7485,22 +6305,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7509,18 +6364,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7543,10 +6433,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7565,7 +6461,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7574,6 +6477,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col1 input vertices: 1 Map 3 @@ -7581,26 +6489,65 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 15) -> boolean predicate: (key < 15) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 0 + native: false + projectedOutputColumns: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -7609,18 +6556,53 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7654,10 +6636,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7676,7 +6664,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7685,6 +6680,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -7692,22 +6692,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1, val val_10) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -7716,18 +6751,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7753,10 +6823,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7775,14 +6851,31 @@ STAGE PLANS: TableScan alias: t3 Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean predicate: (key > 5) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7791,15 +6884,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7808,6 +6925,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col1 input vertices: 1 Map 1 @@ -7815,22 +6937,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7856,10 +7018,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7878,14 +7046,31 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean, FilterStringGroupColLessEqualStringScalar(col 1, val val_20) -> boolean) -> boolean predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -7894,15 +7079,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7911,6 +7120,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col1 input vertices: 1 Map 1 @@ -7918,22 +7132,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7956,10 +7210,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7978,14 +7238,31 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (key > 2) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7994,15 +7271,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8011,6 +7312,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 1 @@ -8018,18 +7324,54 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8057,10 +7399,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8079,7 +7427,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8088,6 +7443,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -8095,22 +7455,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8119,18 +7514,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8172,10 +7602,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8194,7 +7630,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8203,6 +7646,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 (2 * _col0) (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -8210,22 +7658,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongScalarMultiplyLongColumn(val 2, col 0) -> 2:long) -> boolean predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8234,18 +7717,55 @@ STAGE PLANS: key expressions: (2 * _col0) (type: int) sort order: + Map-reduce partition columns: (2 * _col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0) -> 1:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8272,10 +7792,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8294,7 +7820,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8305,6 +7838,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 @@ -8313,43 +7851,107 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8358,18 +7960,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8408,10 +8045,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8430,7 +8073,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8439,6 +8089,11 @@ STAGE PLANS: keys: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -8446,22 +8101,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -8470,18 +8160,53 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8518,10 +8243,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8540,7 +8271,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8551,6 +8289,11 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -8559,22 +8302,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8583,22 +8361,56 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8607,18 +8419,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8657,10 +8504,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8679,6 +8532,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Outer Join0 to 1 @@ -8687,6 +8543,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -8695,31 +8556,82 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8728,18 +8640,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8790,10 +8737,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8812,35 +8765,88 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8849,9 +8855,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -8871,13 +8894,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8931,10 +8972,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8953,23 +9000,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8978,21 +9058,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -9012,13 +9129,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9072,10 +9207,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9094,23 +9235,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9119,21 +9293,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -9153,13 +9364,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9215,10 +9444,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9237,23 +9472,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9262,21 +9530,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -9296,13 +9601,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9369,10 +9692,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9391,7 +9720,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9400,6 +9736,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -9410,6 +9751,11 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 value (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0 input vertices: 1 Map 4 @@ -9417,22 +9763,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9441,30 +9822,85 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9524,10 +9960,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9545,12 +9987,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 100) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -9558,12 +10011,20 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9571,19 +10032,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -9592,9 +10083,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -9612,10 +10120,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9634,7 +10148,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9643,6 +10164,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -9650,22 +10179,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9674,18 +10238,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9714,10 +10313,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9736,7 +10341,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9745,6 +10357,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -9752,22 +10372,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9776,18 +10431,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9818,10 +10508,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9840,7 +10536,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9849,6 +10552,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -9856,22 +10567,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9880,18 +10626,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9914,10 +10695,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9936,7 +10723,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9945,6 +10739,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -9952,26 +10754,65 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 15) -> boolean predicate: (key < 15) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 0 + native: false + projectedOutputColumns: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -9980,18 +10821,53 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10025,10 +10901,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10047,7 +10929,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10056,6 +10945,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10063,22 +10960,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1, val val_10) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -10087,18 +11019,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10124,10 +11091,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10146,14 +11119,31 @@ STAGE PLANS: TableScan alias: t3 Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean predicate: (key > 5) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10162,15 +11152,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10179,6 +11193,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -10186,22 +11208,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10227,10 +11289,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10249,14 +11317,31 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean, FilterStringGroupColLessEqualStringScalar(col 1, val val_20) -> boolean) -> boolean predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -10265,15 +11350,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10282,6 +11391,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -10289,22 +11406,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10327,10 +11484,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10349,14 +11512,31 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (key > 2) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10365,15 +11545,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10382,6 +11586,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 1 @@ -10389,18 +11601,54 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10428,10 +11676,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10450,7 +11704,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10459,6 +11720,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -10466,22 +11735,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10490,18 +11794,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10543,10 +11882,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10565,7 +11910,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10574,6 +11926,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 (2 * _col0) (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10581,22 +11941,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongScalarMultiplyLongColumn(val 2, col 0) -> 2:long) -> boolean predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10605,18 +12000,55 @@ STAGE PLANS: key expressions: (2 * _col0) (type: int) sort order: + Map-reduce partition columns: (2 * _col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0) -> 1:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10643,10 +12075,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10665,7 +12103,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10676,6 +12121,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 @@ -10684,43 +12134,107 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10729,18 +12243,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10779,10 +12328,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10801,7 +12356,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10810,6 +12372,14 @@ STAGE PLANS: keys: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + bigTableKeyColumns: [0, 1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10817,22 +12387,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -10841,18 +12446,53 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10889,10 +12529,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10911,7 +12557,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10922,6 +12575,11 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -10930,22 +12588,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10954,22 +12647,56 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10978,18 +12705,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11028,10 +12790,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11050,6 +12818,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Outer Join0 to 1 @@ -11058,6 +12829,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -11066,31 +12842,82 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11099,18 +12926,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11161,10 +13023,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11183,35 +13051,88 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11220,9 +13141,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -11242,13 +13180,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11302,10 +13258,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11324,23 +13286,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11349,21 +13344,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -11383,13 +13415,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11443,10 +13493,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11465,23 +13521,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11490,21 +13579,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -11524,13 +13650,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11586,10 +13730,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11608,23 +13758,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11633,21 +13816,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -11667,13 +13887,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11740,10 +13978,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11762,7 +14006,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11771,6 +14022,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -11781,6 +14040,14 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 value (type: string) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 4 @@ -11788,22 +14055,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11812,30 +14114,85 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11895,10 +14252,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11916,12 +14279,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 100) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -11929,12 +14303,23 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11942,19 +14327,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -11963,9 +14378,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -11983,10 +14415,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12005,7 +14443,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12014,6 +14459,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12021,22 +14474,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12045,18 +14533,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12085,10 +14608,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12107,7 +14636,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12116,6 +14652,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12123,22 +14667,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12147,18 +14726,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12189,10 +14803,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12211,7 +14831,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12220,6 +14847,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12227,22 +14862,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12251,18 +14921,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12285,10 +14990,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12307,7 +15018,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12316,6 +15034,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -12323,26 +15049,65 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 15) -> boolean predicate: (key < 15) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 0 + native: false + projectedOutputColumns: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -12351,18 +15116,53 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12396,10 +15196,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12418,7 +15224,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12427,6 +15240,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12434,22 +15255,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1, val val_10) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -12458,18 +15314,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12495,10 +15386,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12517,14 +15414,31 @@ STAGE PLANS: TableScan alias: t3 Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean predicate: (key > 5) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12533,15 +15447,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12550,6 +15488,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -12557,22 +15503,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12598,10 +15584,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12620,14 +15612,31 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean, FilterStringGroupColLessEqualStringScalar(col 1, val val_20) -> boolean) -> boolean predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -12636,15 +15645,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12653,6 +15686,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -12660,22 +15701,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12698,10 +15779,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12720,14 +15807,31 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (key > 2) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12736,15 +15840,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12753,6 +15881,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 1 @@ -12760,18 +15896,54 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12799,10 +15971,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12821,7 +15999,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12830,6 +16015,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -12837,22 +16030,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12861,18 +16089,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12914,10 +16177,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12936,7 +16205,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12945,6 +16221,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 (2 * _col0) (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12952,22 +16236,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongScalarMultiplyLongColumn(val 2, col 0) -> 2:long) -> boolean predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12976,18 +16295,55 @@ STAGE PLANS: key expressions: (2 * _col0) (type: int) sort order: + Map-reduce partition columns: (2 * _col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0) -> 1:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13014,10 +16370,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13036,7 +16398,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -13047,6 +16416,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 @@ -13055,43 +16429,107 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13100,18 +16538,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13150,10 +16623,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13172,7 +16651,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -13181,6 +16667,14 @@ STAGE PLANS: keys: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + bigTableKeyColumns: [0, 1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -13188,22 +16682,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -13212,18 +16741,53 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13260,10 +16824,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13282,7 +16852,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -13293,6 +16870,11 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -13301,22 +16883,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13325,22 +16942,56 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13349,18 +17000,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13399,10 +17085,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13421,6 +17113,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Outer Join0 to 1 @@ -13429,6 +17124,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -13437,31 +17137,82 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13470,18 +17221,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13532,10 +17318,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13554,35 +17346,88 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13591,9 +17436,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -13613,13 +17475,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13673,10 +17553,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13695,23 +17581,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13720,21 +17639,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -13754,13 +17710,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13814,10 +17788,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13836,23 +17816,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13861,21 +17874,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -13895,13 +17945,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13957,10 +18025,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13979,23 +18053,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -14004,21 +18111,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -14038,13 +18182,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -14111,10 +18273,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -14133,7 +18301,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -14142,6 +18317,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -14152,6 +18335,14 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 value (type: string) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 4 @@ -14159,22 +18350,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -14183,30 +18409,85 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -14266,10 +18547,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -14287,12 +18574,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 100) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -14300,12 +18598,23 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -14313,19 +18622,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -14334,9 +18673,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index d0efdb0..dfae461 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -1,15 +1,19 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,6 +36,10 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -43,6 +51,10 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -63,6 +75,10 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: int) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -76,6 +92,10 @@ STAGE PLANS: outputColumnNames: l_orderkey Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 @@ -87,10 +107,27 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -101,6 +138,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 @@ -111,6 +152,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col3 input vertices: 1 Reducer 5 @@ -118,9 +163,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -128,8 +180,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -138,6 +203,10 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 @@ -170,18 +239,22 @@ POSTHOOK: Input: default@lineitem 61336 8855 64128 9141 82704 7721 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -207,6 +280,10 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -218,6 +295,10 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -238,6 +319,10 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -258,6 +343,10 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -267,6 +356,10 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -278,6 +371,10 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 7 Map Operator Tree: TableScan @@ -301,6 +398,10 @@ STAGE PLANS: 0 Reducer 6 Statistics: Num rows: 34 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col2 (type: int) mode: hash outputColumnNames: _col0 @@ -312,10 +413,27 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -326,6 +444,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col3, _col4 input vertices: 1 Map 3 @@ -334,12 +456,29 @@ STAGE PLANS: key expressions: _col1 (type: int), _col4 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col4 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col3 (type: int) Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -348,11 +487,28 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -363,11 +519,21 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col3 input vertices: 0 Map 4 Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -376,11 +542,28 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -391,6 +574,10 @@ STAGE PLANS: keys: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col3 input vertices: 0 Reducer 2 @@ -398,9 +585,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out index 1960c0c..f66a0c4 100644 --- ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out @@ -208,7 +208,7 @@ stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@store -PREHOOK: query: explain select +PREHOOK: query: explain vectorization select s_state, count(1) from store_sales, store, @@ -220,7 +220,7 @@ PREHOOK: query: explain select order by s_state limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select +POSTHOOK: query: explain vectorization select s_state, count(1) from store_sales, store, @@ -232,6 +232,10 @@ POSTHOOK: query: explain select order by s_state limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -267,6 +271,12 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: unknown + Map Vectorization: + enabled: true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -287,6 +297,14 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -306,6 +324,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -353,6 +379,13 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -368,6 +401,13 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out index 469c702..d537297 100644 --- ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out +++ ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out @@ -65,16 +65,20 @@ POSTHOOK: query: analyze table orc1 compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@orc1 POSTHOOK: Output: default@orc1 -PREHOOK: query: explain from orc1 a +PREHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 PREHOOK: type: QUERY -POSTHOOK: query: explain from orc1 a +POSTHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -142,6 +146,14 @@ STAGE PLANS: name: default.orc_rn3 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-4 Dependency Collection diff --git ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out new file mode 100644 index 0000000..6edc474 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out @@ -0,0 +1,51 @@ +PREHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cint) IN (ctinyint, cbigint) (type: boolean) + Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is int but the common type is bigint + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/vector_null_projection.q.out ql/src/test/results/clientpositive/llap/vector_null_projection.q.out index a4a36e0..84266a2 100644 --- ql/src/test/results/clientpositive/llap/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_null_projection.q.out @@ -28,12 +28,16 @@ POSTHOOK: query: insert into table b values('aaa') POSTHOOK: type: QUERY POSTHOOK: Output: default@b POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select NULL from a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select NULL from a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -61,6 +65,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -77,12 +87,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@a #### A masked pattern was here #### NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select NULL as x from a union distinct select NULL as x from b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select NULL as x from a union distinct select NULL as x from b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,6 +132,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported + vectorized: false Map 4 Map Operator Tree: TableScan @@ -139,8 +159,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported + vectorized: false Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Key expression for GROUPBY operator: Data type void of Column[KEY._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: void) diff --git ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index fba6f18..2781dab 100644 --- ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -26,10 +26,14 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -47,12 +51,20 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6 input vertices: @@ -61,9 +73,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -71,19 +90,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -110,10 +152,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,7 +177,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -142,6 +195,11 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: 1 Map 2 @@ -150,9 +208,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -160,38 +225,84 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -209,10 +320,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -230,6 +345,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -238,6 +356,11 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -247,9 +370,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -257,32 +387,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -327,10 +495,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -348,7 +520,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -359,6 +538,11 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -368,9 +552,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -378,36 +569,82 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -425,10 +662,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -446,6 +687,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -454,6 +698,11 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -463,9 +712,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -473,30 +729,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -615,10 +909,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -636,12 +934,20 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6 input vertices: @@ -650,9 +956,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -660,19 +973,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -699,10 +1035,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -720,7 +1060,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -731,6 +1078,11 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: 1 Map 2 @@ -739,9 +1091,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -749,38 +1108,84 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -798,10 +1203,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -819,6 +1228,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -827,6 +1239,11 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -836,9 +1253,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -846,32 +1270,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -916,10 +1378,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -937,7 +1403,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -948,6 +1421,11 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -957,9 +1435,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -967,36 +1452,82 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1014,10 +1545,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1035,6 +1570,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -1043,6 +1581,11 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -1052,9 +1595,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1062,30 +1612,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index c89eb11..e94151d 100644 --- ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -94,17 +94,21 @@ POSTHOOK: Lineage: vectortab2k_orc.i SIMPLE [(scratch)scratch.FieldSchema(name:i POSTHOOK: Lineage: vectortab2k_orc.si SIMPLE [(scratch)scratch.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: vectortab2k_orc.t SIMPLE [(scratch)scratch.FieldSchema(name:t, type:tinyint, comment:null), ] scratch.t scratch.si scratch.i scratch.b scratch.f scratch.d scratch.dc -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -122,31 +126,73 @@ STAGE PLANS: TableScan alias: vectortab2k_orc Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), (t < 0) (type: boolean), (si <= 0) (type: boolean), (i = 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 7, 8, 9] + selectExpressions: LongColLessLongScalar(col 0, val 0) -> 7:long, LongColLessEqualLongScalar(col 1, val 0) -> 8:long, LongColEqualLongScalar(col 2, val 0) -> 9:long Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] + selectExpressions: VectorUDFAdaptor(hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2)) -> 6:int Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 6) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: complete outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -173,17 +219,21 @@ POSTHOOK: Input: default@vectortab2k_orc #### A masked pattern was here #### _c0 -3601806268 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -201,31 +251,73 @@ STAGE PLANS: TableScan alias: vectortab2k_orc Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), (t > 0) (type: boolean), (si >= 0) (type: boolean), (i <> 0) (type: boolean), (b > 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 7, 8, 9, 10] + selectExpressions: LongColGreaterLongScalar(col 0, val 0) -> 7:long, LongColGreaterEqualLongScalar(col 1, val 0) -> 8:long, LongColNotEqualLongScalar(col 2, val 0) -> 9:long, LongColGreaterLongScalar(col 3, val 0) -> 10:long Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8] + selectExpressions: VectorUDFAdaptor(hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3)) -> 8:int Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 8) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: complete outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_nvl.q.out ql/src/test/results/clientpositive/llap/vector_nvl.q.out index b926ab4b..aa8ed4a 100644 --- ql/src/test/results/clientpositive/llap/vector_nvl.q.out +++ ql/src/test/results/clientpositive/llap/vector_nvl.q.out @@ -1,31 +1,82 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean + predicate: cdouble is null (type: boolean) + Statistics: Num rows: 3114 Data size: 18608 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: double), 100.0 (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: ConstantVectorExpression(val null) -> 12:double, ConstantVectorExpression(val 100.0) -> 13:double + Statistics: Num rows: 3114 Data size: 24920 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: cdouble is null (type: boolean) - Select Operator - expressions: null (type: double), 100.0 (type: double) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc @@ -51,30 +102,76 @@ NULL 100.0 NULL 100.0 NULL 100.0 NULL 100.0 -PREHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: cfloat (type: float), NVL(cfloat,1) (type: float) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 13] + selectExpressions: VectorCoalesce(columns [4, 12])(children: col 4, ConstantVectorExpression(val 1.0) -> 12:double) -> 13:float + Statistics: Num rows: 12288 Data size: 85848 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: cfloat (type: float), NVL(cfloat,1) (type: float) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc @@ -98,30 +195,76 @@ NULL 1.0 27.0 27.0 -11.0 -11.0 61.0 61.0 -PREHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 10 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: ConstantVectorExpression(val 10) -> 12:long + Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: 10 (type: int) - outputColumnNames: _col0 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT nvl(null, 10) as n FROM alltypesorc @@ -145,30 +288,60 @@ POSTHOOK: Input: default@alltypesorc 10 10 10 -PREHOOK: query: EXPLAIN SELECT nvl(null, null) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, null) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: null (type: void) - outputColumnNames: _col0 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT nvl(null, null) as n FROM alltypesorc diff --git ql/src/test/results/clientpositive/llap/vector_orc_string_reader_empty_dict.q.out ql/src/test/results/clientpositive/llap/vector_orc_string_reader_empty_dict.q.out new file mode 100644 index 0000000..4f00bed --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_orc_string_reader_empty_dict.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcstr +POSTHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcstr +PREHOOK: query: insert overwrite table orcstr select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: insert overwrite table orcstr select "" from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select "" from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### + +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### + diff --git ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index 503cf5b..2073b22 100644 --- ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,12 +129,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 7 + native: false + projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -139,15 +157,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -155,17 +199,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index 3f6aca2..fa36090 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -58,12 +58,16 @@ POSTHOOK: Input: default@orc_table_2 4 FOUR NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -81,9 +85,16 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -91,12 +102,25 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableOuterKeyMapping: 1 -> 2 + bigTableRetainedColumns: [0, 1, 2] + bigTableValueColumns: [0, 1] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2, 3] + smallTableMapping: [3] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -104,23 +128,61 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, string Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -144,12 +206,16 @@ one 1 NULL NULL one 1 NULL NULL three 3 3 THREE two 2 2 TWO -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -167,26 +233,57 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -194,12 +291,25 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableOuterKeyMapping: 0 -> 3 + bigTableRetainedColumns: [0, 1, 3] + bigTableValueColumns: [0, 1] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [2, 3, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -207,6 +317,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, bigint Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index c7cadf1..2ea8c8f 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -214,18 +214,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -243,9 +247,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -253,12 +264,25 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col2 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableOuterKeyMapping: 2 -> 14 + bigTableRetainedColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14] + bigTableValueColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + smallTableMapping: [12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 Statistics: Num rows: 32 Data size: 19648 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 32 Data size: 19648 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -266,23 +290,61 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, double, double, string, string, timestamp, timestamp, bigint, bigint Map 2 Map Operator Tree: TableScan alias: cd Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -323,18 +385,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -352,9 +418,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -362,12 +435,23 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 112 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 112 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -375,22 +459,59 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: hd Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -517,7 +638,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -526,7 +647,7 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -535,6 +656,10 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -553,9 +678,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] Statistics: Num rows: 15 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -563,6 +695,14 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -573,63 +713,163 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 4 Statistics: Num rows: 240 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: cd Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: hd Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index a58ce8e..2077dce 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -224,7 +224,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -233,7 +233,7 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -242,6 +242,10 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -260,9 +264,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 20 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int), cbigint (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] Statistics: Num rows: 20 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -270,6 +281,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableRetainedColumns: [3] + bigTableValueColumns: [3] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [3] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -280,63 +299,163 @@ STAGE PLANS: keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: [3] + bigTableRetainedColumns: [3] + bigTableValueColumns: [3] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [3] outputColumnNames: _col1 input vertices: 1 Map 4 Statistics: Num rows: 162 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: cd Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: hd Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out index a34cb8d..dbbfd34 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out @@ -224,7 +224,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -233,7 +233,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -242,112 +242,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 40 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 80 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -369,7 +264,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -378,7 +273,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -387,112 +282,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 2939 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 2939 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 21 Data size: 1869 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 42 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring2 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -514,7 +304,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 28 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -523,7 +313,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -532,112 +322,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) - Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out index 0fc7021..ffce9e6 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out @@ -244,82 +244,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - input vertices: - 1 Map 2 - Statistics: Num rows: 81 Data size: 49734 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 81 Data size: 49734 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -388,81 +325,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 450 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 450 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd @@ -887,7 +762,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -896,7 +771,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -905,112 +780,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 81 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 1215 Data size: 9720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out index 6866862..4f25253 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out @@ -62,100 +62,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -175,103 +96,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -291,103 +130,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6058 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -407,103 +164,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6248 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -523,7 +198,7 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -532,7 +207,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -541,119 +216,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col0 pmod 8) (type: bigint) - sort order: + - Map-reduce partition columns: (_col0 pmod 8) (type: bigint) - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 9760469 Data size: 78083752 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -739,100 +302,21 @@ POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -852,103 +336,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 39112 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -968,103 +370,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 11171 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1084,103 +404,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 14371 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1200,7 +438,7 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 17792 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1209,7 +447,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1218,119 +456,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col0 pmod 8) (type: bigint) - sort order: + - Map-reduce partition columns: (_col0 pmod 8) (type: bigint) - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 152914016 Data size: 1223312128 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out index 736b8f9..51068fe 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out @@ -122,113 +122,15 @@ POSTHOOK: Output: default@TJOIN4 POSTHOOK: Lineage: tjoin4.c1 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin4.c2 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin4.rnum SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -247,108 +149,15 @@ POSTHOOK: Input: default@tjoin3 0 3 0 1 NULL NULL 2 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index 8b054b8..899687f 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -72,12 +72,16 @@ POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_date_sk SIMPLE [(invent POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -95,31 +99,73 @@ STAGE PLANS: TableScan alias: inventory_part_0 Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -196,12 +242,16 @@ POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_date_sk SIMPLE [(in POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -219,31 +269,73 @@ STAGE PLANS: TableScan alias: inventory_part_1 Statistics: Num rows: 200 Data size: 13476 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 13476 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -320,12 +412,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2a POSTHOOK: Input: default@inventory_part_2a@par=2 POSTHOOK: Output: default@inventory_part_2a@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -343,31 +439,73 @@ STAGE PLANS: TableScan alias: inventory_part_2a Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -431,12 +569,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2b POSTHOOK: Input: default@inventory_part_2b@par1=2/par2=3 POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -454,31 +596,73 @@ STAGE PLANS: TableScan alias: inventory_part_2b Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -542,12 +726,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_3 POSTHOOK: Input: default@inventory_part_3@par=2 POSTHOOK: Output: default@inventory_part_3@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -565,31 +753,73 @@ STAGE PLANS: TableScan alias: inventory_part_3 Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 2703aff..4cc6b81 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -244,12 +244,16 @@ POSTHOOK: Input: default@flights_tiny_orc 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -268,46 +272,102 @@ STAGE PLANS: TableScan alias: flights_tiny_orc Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int), _col2 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 4, 5, 0] Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int), _col2 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 4, 5, 0] Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -353,12 +413,16 @@ Chicago New York 2010-10-24 2010-10-24 07:00:00 113.0 897 Chicago New York 2010-10-25 2010-10-25 07:00:00 -1.0 897 Chicago New York 2010-10-26 2010-10-26 07:00:00 0.0 897 Chicago New York 2010-10-27 2010-10-27 07:00:00 -11.0 897 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -376,12 +440,26 @@ STAGE PLANS: TableScan alias: flights_tiny_orc Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -390,21 +468,50 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 68 Data size: 19584 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 68 Data size: 19584 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -578,17 +685,19 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-29 POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-30 POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-31 #### A masked pattern was here #### -Baltimore New York 2010-10-20 07:00:00 -30.0 1064 2010-10-20 -Baltimore New York 2010-10-20 07:00:00 23.0 1142 2010-10-20 -Baltimore New York 2010-10-20 07:00:00 6.0 1599 2010-10-20 -Chicago New York 2010-10-20 07:00:00 42.0 361 2010-10-20 -Chicago New York 2010-10-20 07:00:00 24.0 897 2010-10-20 -Chicago New York 2010-10-20 07:00:00 15.0 1531 2010-10-20 -Chicago New York 2010-10-20 07:00:00 -6.0 1610 2010-10-20 -Chicago New York 2010-10-20 07:00:00 -2.0 3198 2010-10-20 -Cleveland New York 2010-10-20 07:00:00 -8.0 2630 2010-10-20 -Cleveland New York 2010-10-20 07:00:00 -15.0 3014 2010-10-20 -Washington New York 2010-10-20 07:00:00 -2.0 7291 2010-10-20 +Baltimore New York 2010-10-26 07:00:00 -22.0 1064 2010-10-26 +Baltimore New York 2010-10-26 07:00:00 123.0 1142 2010-10-26 +Baltimore New York 2010-10-26 07:00:00 90.0 1599 2010-10-26 +Chicago New York 2010-10-26 07:00:00 12.0 361 2010-10-26 +Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 +Chicago New York 2010-10-26 07:00:00 29.0 1531 2010-10-26 +Chicago New York 2010-10-26 07:00:00 -17.0 1610 2010-10-26 +Chicago New York 2010-10-26 07:00:00 6.0 3198 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 4.0 2630 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 -27.0 2646 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 -11.0 2662 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 13.0 3014 2010-10-26 +Washington New York 2010-10-26 07:00:00 4.0 7291 2010-10-26 Baltimore New York 2010-10-21 07:00:00 17.0 1064 2010-10-21 Baltimore New York 2010-10-21 07:00:00 105.0 1142 2010-10-21 Baltimore New York 2010-10-21 07:00:00 28.0 1599 2010-10-21 @@ -601,41 +710,6 @@ Cleveland New York 2010-10-21 07:00:00 3.0 2630 2010-10-21 Cleveland New York 2010-10-21 07:00:00 29.0 2646 2010-10-21 Cleveland New York 2010-10-21 07:00:00 72.0 3014 2010-10-21 Washington New York 2010-10-21 07:00:00 22.0 7291 2010-10-21 -Baltimore New York 2010-10-22 07:00:00 -12.0 1064 2010-10-22 -Baltimore New York 2010-10-22 07:00:00 54.0 1142 2010-10-22 -Baltimore New York 2010-10-22 07:00:00 18.0 1599 2010-10-22 -Chicago New York 2010-10-22 07:00:00 2.0 361 2010-10-22 -Chicago New York 2010-10-22 07:00:00 24.0 897 2010-10-22 -Chicago New York 2010-10-22 07:00:00 16.0 1531 2010-10-22 -Chicago New York 2010-10-22 07:00:00 -6.0 1610 2010-10-22 -Chicago New York 2010-10-22 07:00:00 -11.0 3198 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 1.0 2630 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 -25.0 2646 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 -3.0 3014 2010-10-22 -Baltimore New York 2010-10-23 07:00:00 18.0 272 2010-10-23 -Baltimore New York 2010-10-23 07:00:00 -10.0 1805 2010-10-23 -Baltimore New York 2010-10-23 07:00:00 6.0 3171 2010-10-23 -Chicago New York 2010-10-23 07:00:00 3.0 384 2010-10-23 -Chicago New York 2010-10-23 07:00:00 32.0 426 2010-10-23 -Chicago New York 2010-10-23 07:00:00 1.0 650 2010-10-23 -Chicago New York 2010-10-23 07:00:00 11.0 3085 2010-10-23 -Cleveland New York 2010-10-23 07:00:00 -21.0 2932 2010-10-23 -Washington New York 2010-10-23 07:00:00 -25.0 5832 2010-10-23 -Washington New York 2010-10-23 07:00:00 -21.0 5904 2010-10-23 -Washington New York 2010-10-23 07:00:00 -18.0 5917 2010-10-23 -Washington New York 2010-10-23 07:00:00 -16.0 7274 2010-10-23 -Baltimore New York 2010-10-24 07:00:00 12.0 1599 2010-10-24 -Baltimore New York 2010-10-24 07:00:00 20.0 2571 2010-10-24 -Chicago New York 2010-10-24 07:00:00 10.0 361 2010-10-24 -Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -5.0 1531 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -17.0 1610 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -3.0 3198 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 5.0 2254 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -11.0 2630 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -20.0 2646 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -9.0 3014 2010-10-24 -Washington New York 2010-10-24 07:00:00 -26.0 7282 2010-10-24 Baltimore New York 2010-10-25 07:00:00 -25.0 1064 2010-10-25 Baltimore New York 2010-10-25 07:00:00 92.0 1142 2010-10-25 Baltimore New York 2010-10-25 07:00:00 106.0 1599 2010-10-25 @@ -648,19 +722,42 @@ Cleveland New York 2010-10-25 07:00:00 -4.0 2630 2010-10-25 Cleveland New York 2010-10-25 07:00:00 81.0 2646 2010-10-25 Cleveland New York 2010-10-25 07:00:00 42.0 3014 2010-10-25 Washington New York 2010-10-25 07:00:00 9.0 7291 2010-10-25 -Baltimore New York 2010-10-26 07:00:00 -22.0 1064 2010-10-26 -Baltimore New York 2010-10-26 07:00:00 123.0 1142 2010-10-26 -Baltimore New York 2010-10-26 07:00:00 90.0 1599 2010-10-26 -Chicago New York 2010-10-26 07:00:00 12.0 361 2010-10-26 -Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 -Chicago New York 2010-10-26 07:00:00 29.0 1531 2010-10-26 -Chicago New York 2010-10-26 07:00:00 -17.0 1610 2010-10-26 -Chicago New York 2010-10-26 07:00:00 6.0 3198 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 4.0 2630 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 -27.0 2646 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 -11.0 2662 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 13.0 3014 2010-10-26 -Washington New York 2010-10-26 07:00:00 4.0 7291 2010-10-26 +Baltimore New York 2010-10-24 07:00:00 12.0 1599 2010-10-24 +Baltimore New York 2010-10-24 07:00:00 20.0 2571 2010-10-24 +Chicago New York 2010-10-24 07:00:00 10.0 361 2010-10-24 +Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -5.0 1531 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -17.0 1610 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -3.0 3198 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 5.0 2254 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -11.0 2630 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -20.0 2646 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -9.0 3014 2010-10-24 +Washington New York 2010-10-24 07:00:00 -26.0 7282 2010-10-24 +Baltimore New York 2010-10-23 07:00:00 18.0 272 2010-10-23 +Baltimore New York 2010-10-23 07:00:00 -10.0 1805 2010-10-23 +Baltimore New York 2010-10-23 07:00:00 6.0 3171 2010-10-23 +Chicago New York 2010-10-23 07:00:00 3.0 384 2010-10-23 +Chicago New York 2010-10-23 07:00:00 32.0 426 2010-10-23 +Chicago New York 2010-10-23 07:00:00 1.0 650 2010-10-23 +Chicago New York 2010-10-23 07:00:00 11.0 3085 2010-10-23 +Cleveland New York 2010-10-23 07:00:00 -21.0 2932 2010-10-23 +Washington New York 2010-10-23 07:00:00 -25.0 5832 2010-10-23 +Washington New York 2010-10-23 07:00:00 -21.0 5904 2010-10-23 +Washington New York 2010-10-23 07:00:00 -18.0 5917 2010-10-23 +Washington New York 2010-10-23 07:00:00 -16.0 7274 2010-10-23 +Baltimore New York 2010-10-29 07:00:00 -24.0 1064 2010-10-29 +Baltimore New York 2010-10-29 07:00:00 21.0 1142 2010-10-29 +Baltimore New York 2010-10-29 07:00:00 -2.0 1599 2010-10-29 +Chicago New York 2010-10-29 07:00:00 -12.0 361 2010-10-29 +Chicago New York 2010-10-29 07:00:00 -11.0 897 2010-10-29 +Chicago New York 2010-10-29 07:00:00 15.0 1531 2010-10-29 +Chicago New York 2010-10-29 07:00:00 -18.0 1610 2010-10-29 +Chicago New York 2010-10-29 07:00:00 -4.0 3198 2010-10-29 +Cleveland New York 2010-10-29 07:00:00 -4.0 2630 2010-10-29 +Cleveland New York 2010-10-29 07:00:00 -19.0 2646 2010-10-29 +Cleveland New York 2010-10-29 07:00:00 -12.0 3014 2010-10-29 +Washington New York 2010-10-29 07:00:00 1.0 7291 2010-10-29 Baltimore New York 2010-10-27 07:00:00 -18.0 1064 2010-10-27 Baltimore New York 2010-10-27 07:00:00 49.0 1142 2010-10-27 Baltimore New York 2010-10-27 07:00:00 92.0 1599 2010-10-27 @@ -684,18 +781,6 @@ Cleveland New York 2010-10-28 07:00:00 3.0 2630 2010-10-28 Cleveland New York 2010-10-28 07:00:00 -6.0 2646 2010-10-28 Cleveland New York 2010-10-28 07:00:00 1.0 3014 2010-10-28 Washington New York 2010-10-28 07:00:00 45.0 7291 2010-10-28 -Baltimore New York 2010-10-29 07:00:00 -24.0 1064 2010-10-29 -Baltimore New York 2010-10-29 07:00:00 21.0 1142 2010-10-29 -Baltimore New York 2010-10-29 07:00:00 -2.0 1599 2010-10-29 -Chicago New York 2010-10-29 07:00:00 -12.0 361 2010-10-29 -Chicago New York 2010-10-29 07:00:00 -11.0 897 2010-10-29 -Chicago New York 2010-10-29 07:00:00 15.0 1531 2010-10-29 -Chicago New York 2010-10-29 07:00:00 -18.0 1610 2010-10-29 -Chicago New York 2010-10-29 07:00:00 -4.0 3198 2010-10-29 -Cleveland New York 2010-10-29 07:00:00 -4.0 2630 2010-10-29 -Cleveland New York 2010-10-29 07:00:00 -19.0 2646 2010-10-29 -Cleveland New York 2010-10-29 07:00:00 -12.0 3014 2010-10-29 -Washington New York 2010-10-29 07:00:00 1.0 7291 2010-10-29 Baltimore New York 2010-10-30 07:00:00 14.0 272 2010-10-30 Baltimore New York 2010-10-30 07:00:00 -1.0 1805 2010-10-30 Baltimore New York 2010-10-30 07:00:00 5.0 3171 2010-10-30 @@ -707,6 +792,28 @@ Cleveland New York 2010-10-30 07:00:00 -23.0 2018 2010-10-30 Cleveland New York 2010-10-30 07:00:00 -12.0 2932 2010-10-30 Washington New York 2010-10-30 07:00:00 -27.0 5904 2010-10-30 Washington New York 2010-10-30 07:00:00 -16.0 5917 2010-10-30 +Baltimore New York 2010-10-20 07:00:00 -30.0 1064 2010-10-20 +Baltimore New York 2010-10-20 07:00:00 23.0 1142 2010-10-20 +Baltimore New York 2010-10-20 07:00:00 6.0 1599 2010-10-20 +Chicago New York 2010-10-20 07:00:00 42.0 361 2010-10-20 +Chicago New York 2010-10-20 07:00:00 24.0 897 2010-10-20 +Chicago New York 2010-10-20 07:00:00 15.0 1531 2010-10-20 +Chicago New York 2010-10-20 07:00:00 -6.0 1610 2010-10-20 +Chicago New York 2010-10-20 07:00:00 -2.0 3198 2010-10-20 +Cleveland New York 2010-10-20 07:00:00 -8.0 2630 2010-10-20 +Cleveland New York 2010-10-20 07:00:00 -15.0 3014 2010-10-20 +Washington New York 2010-10-20 07:00:00 -2.0 7291 2010-10-20 +Baltimore New York 2010-10-22 07:00:00 -12.0 1064 2010-10-22 +Baltimore New York 2010-10-22 07:00:00 54.0 1142 2010-10-22 +Baltimore New York 2010-10-22 07:00:00 18.0 1599 2010-10-22 +Chicago New York 2010-10-22 07:00:00 2.0 361 2010-10-22 +Chicago New York 2010-10-22 07:00:00 24.0 897 2010-10-22 +Chicago New York 2010-10-22 07:00:00 16.0 1531 2010-10-22 +Chicago New York 2010-10-22 07:00:00 -6.0 1610 2010-10-22 +Chicago New York 2010-10-22 07:00:00 -11.0 3198 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 1.0 2630 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 -25.0 2646 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 -3.0 3014 2010-10-22 Baltimore New York 2010-10-31 07:00:00 -1.0 1599 2010-10-31 Baltimore New York 2010-10-31 07:00:00 -14.0 2571 2010-10-31 Chicago New York 2010-10-31 07:00:00 -25.0 361 2010-10-31 @@ -816,26 +923,67 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-31 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: flights_tiny_orc_partitioned_date + Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: flights_tiny_orc_partitioned_date - Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - ListSink + ListSink PREHOOK: query: select * from flights_tiny_orc_partitioned_date PREHOOK: type: QUERY @@ -869,17 +1017,19 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-29 POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-30 POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-31 #### A masked pattern was here #### -Baltimore New York 2010-10-20 07:00:00 -30.0 1064 2010-10-20 -Baltimore New York 2010-10-20 07:00:00 23.0 1142 2010-10-20 -Baltimore New York 2010-10-20 07:00:00 6.0 1599 2010-10-20 -Chicago New York 2010-10-20 07:00:00 42.0 361 2010-10-20 -Chicago New York 2010-10-20 07:00:00 24.0 897 2010-10-20 -Chicago New York 2010-10-20 07:00:00 15.0 1531 2010-10-20 -Chicago New York 2010-10-20 07:00:00 -6.0 1610 2010-10-20 -Chicago New York 2010-10-20 07:00:00 -2.0 3198 2010-10-20 -Cleveland New York 2010-10-20 07:00:00 -8.0 2630 2010-10-20 -Cleveland New York 2010-10-20 07:00:00 -15.0 3014 2010-10-20 -Washington New York 2010-10-20 07:00:00 -2.0 7291 2010-10-20 +Baltimore New York 2010-10-26 07:00:00 -22.0 1064 2010-10-26 +Baltimore New York 2010-10-26 07:00:00 123.0 1142 2010-10-26 +Baltimore New York 2010-10-26 07:00:00 90.0 1599 2010-10-26 +Chicago New York 2010-10-26 07:00:00 12.0 361 2010-10-26 +Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 +Chicago New York 2010-10-26 07:00:00 29.0 1531 2010-10-26 +Chicago New York 2010-10-26 07:00:00 -17.0 1610 2010-10-26 +Chicago New York 2010-10-26 07:00:00 6.0 3198 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 4.0 2630 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 -27.0 2646 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 -11.0 2662 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 13.0 3014 2010-10-26 +Washington New York 2010-10-26 07:00:00 4.0 7291 2010-10-26 Baltimore New York 2010-10-21 07:00:00 17.0 1064 2010-10-21 Baltimore New York 2010-10-21 07:00:00 105.0 1142 2010-10-21 Baltimore New York 2010-10-21 07:00:00 28.0 1599 2010-10-21 @@ -892,41 +1042,6 @@ Cleveland New York 2010-10-21 07:00:00 3.0 2630 2010-10-21 Cleveland New York 2010-10-21 07:00:00 29.0 2646 2010-10-21 Cleveland New York 2010-10-21 07:00:00 72.0 3014 2010-10-21 Washington New York 2010-10-21 07:00:00 22.0 7291 2010-10-21 -Baltimore New York 2010-10-22 07:00:00 -12.0 1064 2010-10-22 -Baltimore New York 2010-10-22 07:00:00 54.0 1142 2010-10-22 -Baltimore New York 2010-10-22 07:00:00 18.0 1599 2010-10-22 -Chicago New York 2010-10-22 07:00:00 2.0 361 2010-10-22 -Chicago New York 2010-10-22 07:00:00 24.0 897 2010-10-22 -Chicago New York 2010-10-22 07:00:00 16.0 1531 2010-10-22 -Chicago New York 2010-10-22 07:00:00 -6.0 1610 2010-10-22 -Chicago New York 2010-10-22 07:00:00 -11.0 3198 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 1.0 2630 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 -25.0 2646 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 -3.0 3014 2010-10-22 -Baltimore New York 2010-10-23 07:00:00 18.0 272 2010-10-23 -Baltimore New York 2010-10-23 07:00:00 -10.0 1805 2010-10-23 -Baltimore New York 2010-10-23 07:00:00 6.0 3171 2010-10-23 -Chicago New York 2010-10-23 07:00:00 3.0 384 2010-10-23 -Chicago New York 2010-10-23 07:00:00 32.0 426 2010-10-23 -Chicago New York 2010-10-23 07:00:00 1.0 650 2010-10-23 -Chicago New York 2010-10-23 07:00:00 11.0 3085 2010-10-23 -Cleveland New York 2010-10-23 07:00:00 -21.0 2932 2010-10-23 -Washington New York 2010-10-23 07:00:00 -25.0 5832 2010-10-23 -Washington New York 2010-10-23 07:00:00 -21.0 5904 2010-10-23 -Washington New York 2010-10-23 07:00:00 -18.0 5917 2010-10-23 -Washington New York 2010-10-23 07:00:00 -16.0 7274 2010-10-23 -Baltimore New York 2010-10-24 07:00:00 12.0 1599 2010-10-24 -Baltimore New York 2010-10-24 07:00:00 20.0 2571 2010-10-24 -Chicago New York 2010-10-24 07:00:00 10.0 361 2010-10-24 -Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -5.0 1531 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -17.0 1610 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -3.0 3198 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 5.0 2254 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -11.0 2630 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -20.0 2646 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -9.0 3014 2010-10-24 -Washington New York 2010-10-24 07:00:00 -26.0 7282 2010-10-24 Baltimore New York 2010-10-25 07:00:00 -25.0 1064 2010-10-25 Baltimore New York 2010-10-25 07:00:00 92.0 1142 2010-10-25 Baltimore New York 2010-10-25 07:00:00 106.0 1599 2010-10-25 @@ -939,42 +1054,30 @@ Cleveland New York 2010-10-25 07:00:00 -4.0 2630 2010-10-25 Cleveland New York 2010-10-25 07:00:00 81.0 2646 2010-10-25 Cleveland New York 2010-10-25 07:00:00 42.0 3014 2010-10-25 Washington New York 2010-10-25 07:00:00 9.0 7291 2010-10-25 -Baltimore New York 2010-10-26 07:00:00 -22.0 1064 2010-10-26 -Baltimore New York 2010-10-26 07:00:00 123.0 1142 2010-10-26 -Baltimore New York 2010-10-26 07:00:00 90.0 1599 2010-10-26 -Chicago New York 2010-10-26 07:00:00 12.0 361 2010-10-26 -Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 -Chicago New York 2010-10-26 07:00:00 29.0 1531 2010-10-26 -Chicago New York 2010-10-26 07:00:00 -17.0 1610 2010-10-26 -Chicago New York 2010-10-26 07:00:00 6.0 3198 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 4.0 2630 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 -27.0 2646 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 -11.0 2662 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 13.0 3014 2010-10-26 -Washington New York 2010-10-26 07:00:00 4.0 7291 2010-10-26 -Baltimore New York 2010-10-27 07:00:00 -18.0 1064 2010-10-27 -Baltimore New York 2010-10-27 07:00:00 49.0 1142 2010-10-27 -Baltimore New York 2010-10-27 07:00:00 92.0 1599 2010-10-27 -Chicago New York 2010-10-27 07:00:00 148.0 361 2010-10-27 -Chicago New York 2010-10-27 07:00:00 -11.0 897 2010-10-27 -Chicago New York 2010-10-27 07:00:00 70.0 1531 2010-10-27 -Chicago New York 2010-10-27 07:00:00 8.0 1610 2010-10-27 -Chicago New York 2010-10-27 07:00:00 21.0 3198 2010-10-27 -Cleveland New York 2010-10-27 07:00:00 16.0 2630 2010-10-27 -Cleveland New York 2010-10-27 07:00:00 27.0 3014 2010-10-27 -Washington New York 2010-10-27 07:00:00 26.0 7291 2010-10-27 -Baltimore New York 2010-10-28 07:00:00 -4.0 1064 2010-10-28 -Baltimore New York 2010-10-28 07:00:00 -14.0 1142 2010-10-28 -Baltimore New York 2010-10-28 07:00:00 -14.0 1599 2010-10-28 -Chicago New York 2010-10-28 07:00:00 2.0 361 2010-10-28 -Chicago New York 2010-10-28 07:00:00 2.0 897 2010-10-28 -Chicago New York 2010-10-28 07:00:00 -11.0 1531 2010-10-28 -Chicago New York 2010-10-28 07:00:00 3.0 1610 2010-10-28 -Chicago New York 2010-10-28 07:00:00 -18.0 3198 2010-10-28 -Cleveland New York 2010-10-28 07:00:00 3.0 2630 2010-10-28 -Cleveland New York 2010-10-28 07:00:00 -6.0 2646 2010-10-28 -Cleveland New York 2010-10-28 07:00:00 1.0 3014 2010-10-28 -Washington New York 2010-10-28 07:00:00 45.0 7291 2010-10-28 +Baltimore New York 2010-10-24 07:00:00 12.0 1599 2010-10-24 +Baltimore New York 2010-10-24 07:00:00 20.0 2571 2010-10-24 +Chicago New York 2010-10-24 07:00:00 10.0 361 2010-10-24 +Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -5.0 1531 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -17.0 1610 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -3.0 3198 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 5.0 2254 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -11.0 2630 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -20.0 2646 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -9.0 3014 2010-10-24 +Washington New York 2010-10-24 07:00:00 -26.0 7282 2010-10-24 +Baltimore New York 2010-10-23 07:00:00 18.0 272 2010-10-23 +Baltimore New York 2010-10-23 07:00:00 -10.0 1805 2010-10-23 +Baltimore New York 2010-10-23 07:00:00 6.0 3171 2010-10-23 +Chicago New York 2010-10-23 07:00:00 3.0 384 2010-10-23 +Chicago New York 2010-10-23 07:00:00 32.0 426 2010-10-23 +Chicago New York 2010-10-23 07:00:00 1.0 650 2010-10-23 +Chicago New York 2010-10-23 07:00:00 11.0 3085 2010-10-23 +Cleveland New York 2010-10-23 07:00:00 -21.0 2932 2010-10-23 +Washington New York 2010-10-23 07:00:00 -25.0 5832 2010-10-23 +Washington New York 2010-10-23 07:00:00 -21.0 5904 2010-10-23 +Washington New York 2010-10-23 07:00:00 -18.0 5917 2010-10-23 +Washington New York 2010-10-23 07:00:00 -16.0 7274 2010-10-23 Baltimore New York 2010-10-29 07:00:00 -24.0 1064 2010-10-29 Baltimore New York 2010-10-29 07:00:00 21.0 1142 2010-10-29 Baltimore New York 2010-10-29 07:00:00 -2.0 1599 2010-10-29 @@ -987,6 +1090,29 @@ Cleveland New York 2010-10-29 07:00:00 -4.0 2630 2010-10-29 Cleveland New York 2010-10-29 07:00:00 -19.0 2646 2010-10-29 Cleveland New York 2010-10-29 07:00:00 -12.0 3014 2010-10-29 Washington New York 2010-10-29 07:00:00 1.0 7291 2010-10-29 +Baltimore New York 2010-10-27 07:00:00 -18.0 1064 2010-10-27 +Baltimore New York 2010-10-27 07:00:00 49.0 1142 2010-10-27 +Baltimore New York 2010-10-27 07:00:00 92.0 1599 2010-10-27 +Chicago New York 2010-10-27 07:00:00 148.0 361 2010-10-27 +Chicago New York 2010-10-27 07:00:00 -11.0 897 2010-10-27 +Chicago New York 2010-10-27 07:00:00 70.0 1531 2010-10-27 +Chicago New York 2010-10-27 07:00:00 8.0 1610 2010-10-27 +Chicago New York 2010-10-27 07:00:00 21.0 3198 2010-10-27 +Cleveland New York 2010-10-27 07:00:00 16.0 2630 2010-10-27 +Cleveland New York 2010-10-27 07:00:00 27.0 3014 2010-10-27 +Washington New York 2010-10-27 07:00:00 26.0 7291 2010-10-27 +Baltimore New York 2010-10-28 07:00:00 -4.0 1064 2010-10-28 +Baltimore New York 2010-10-28 07:00:00 -14.0 1142 2010-10-28 +Baltimore New York 2010-10-28 07:00:00 -14.0 1599 2010-10-28 +Chicago New York 2010-10-28 07:00:00 2.0 361 2010-10-28 +Chicago New York 2010-10-28 07:00:00 2.0 897 2010-10-28 +Chicago New York 2010-10-28 07:00:00 -11.0 1531 2010-10-28 +Chicago New York 2010-10-28 07:00:00 3.0 1610 2010-10-28 +Chicago New York 2010-10-28 07:00:00 -18.0 3198 2010-10-28 +Cleveland New York 2010-10-28 07:00:00 3.0 2630 2010-10-28 +Cleveland New York 2010-10-28 07:00:00 -6.0 2646 2010-10-28 +Cleveland New York 2010-10-28 07:00:00 1.0 3014 2010-10-28 +Washington New York 2010-10-28 07:00:00 45.0 7291 2010-10-28 Baltimore New York 2010-10-30 07:00:00 14.0 272 2010-10-30 Baltimore New York 2010-10-30 07:00:00 -1.0 1805 2010-10-30 Baltimore New York 2010-10-30 07:00:00 5.0 3171 2010-10-30 @@ -998,6 +1124,28 @@ Cleveland New York 2010-10-30 07:00:00 -23.0 2018 2010-10-30 Cleveland New York 2010-10-30 07:00:00 -12.0 2932 2010-10-30 Washington New York 2010-10-30 07:00:00 -27.0 5904 2010-10-30 Washington New York 2010-10-30 07:00:00 -16.0 5917 2010-10-30 +Baltimore New York 2010-10-20 07:00:00 -30.0 1064 2010-10-20 +Baltimore New York 2010-10-20 07:00:00 23.0 1142 2010-10-20 +Baltimore New York 2010-10-20 07:00:00 6.0 1599 2010-10-20 +Chicago New York 2010-10-20 07:00:00 42.0 361 2010-10-20 +Chicago New York 2010-10-20 07:00:00 24.0 897 2010-10-20 +Chicago New York 2010-10-20 07:00:00 15.0 1531 2010-10-20 +Chicago New York 2010-10-20 07:00:00 -6.0 1610 2010-10-20 +Chicago New York 2010-10-20 07:00:00 -2.0 3198 2010-10-20 +Cleveland New York 2010-10-20 07:00:00 -8.0 2630 2010-10-20 +Cleveland New York 2010-10-20 07:00:00 -15.0 3014 2010-10-20 +Washington New York 2010-10-20 07:00:00 -2.0 7291 2010-10-20 +Baltimore New York 2010-10-22 07:00:00 -12.0 1064 2010-10-22 +Baltimore New York 2010-10-22 07:00:00 54.0 1142 2010-10-22 +Baltimore New York 2010-10-22 07:00:00 18.0 1599 2010-10-22 +Chicago New York 2010-10-22 07:00:00 2.0 361 2010-10-22 +Chicago New York 2010-10-22 07:00:00 24.0 897 2010-10-22 +Chicago New York 2010-10-22 07:00:00 16.0 1531 2010-10-22 +Chicago New York 2010-10-22 07:00:00 -6.0 1610 2010-10-22 +Chicago New York 2010-10-22 07:00:00 -11.0 3198 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 1.0 2630 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 -25.0 2646 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 -3.0 3014 2010-10-22 Baltimore New York 2010-10-31 07:00:00 -1.0 1599 2010-10-31 Baltimore New York 2010-10-31 07:00:00 -14.0 2571 2010-10-31 Chicago New York 2010-10-31 07:00:00 -25.0 361 2010-10-31 @@ -1006,12 +1154,16 @@ Chicago New York 2010-10-31 07:00:00 -4.0 1531 2010-10-31 Chicago New York 2010-10-31 07:00:00 -22.0 1610 2010-10-31 Chicago New York 2010-10-31 07:00:00 -15.0 3198 2010-10-31 Washington New York 2010-10-31 07:00:00 -18.0 7282 2010-10-31 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1030,46 +1182,102 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_date Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 0, 1] Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 0, 1] Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1139,12 +1347,16 @@ Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 Chicago New York 2010-10-25 07:00:00 -1.0 897 2010-10-25 Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 Chicago New York 2010-10-27 07:00:00 -11.0 897 2010-10-27 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1162,12 +1374,26 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_date Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5] Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -1176,21 +1402,50 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1388,17 +1643,19 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10- POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10-30 07%3A00%3A00 POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10-31 07%3A00%3A00 #### A masked pattern was here #### -Baltimore New York 2010-10-20 -30.0 1064 2010-10-20 07:00:00 -Baltimore New York 2010-10-20 23.0 1142 2010-10-20 07:00:00 -Baltimore New York 2010-10-20 6.0 1599 2010-10-20 07:00:00 -Chicago New York 2010-10-20 42.0 361 2010-10-20 07:00:00 -Chicago New York 2010-10-20 24.0 897 2010-10-20 07:00:00 -Chicago New York 2010-10-20 15.0 1531 2010-10-20 07:00:00 -Chicago New York 2010-10-20 -6.0 1610 2010-10-20 07:00:00 -Chicago New York 2010-10-20 -2.0 3198 2010-10-20 07:00:00 -Cleveland New York 2010-10-20 -8.0 2630 2010-10-20 07:00:00 -Cleveland New York 2010-10-20 -15.0 3014 2010-10-20 07:00:00 -Washington New York 2010-10-20 -2.0 7291 2010-10-20 07:00:00 +Baltimore New York 2010-10-26 -22.0 1064 2010-10-26 07:00:00 +Baltimore New York 2010-10-26 123.0 1142 2010-10-26 07:00:00 +Baltimore New York 2010-10-26 90.0 1599 2010-10-26 07:00:00 +Chicago New York 2010-10-26 12.0 361 2010-10-26 07:00:00 +Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 +Chicago New York 2010-10-26 29.0 1531 2010-10-26 07:00:00 +Chicago New York 2010-10-26 -17.0 1610 2010-10-26 07:00:00 +Chicago New York 2010-10-26 6.0 3198 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 4.0 2630 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 -27.0 2646 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 -11.0 2662 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 13.0 3014 2010-10-26 07:00:00 +Washington New York 2010-10-26 4.0 7291 2010-10-26 07:00:00 Baltimore New York 2010-10-21 17.0 1064 2010-10-21 07:00:00 Baltimore New York 2010-10-21 105.0 1142 2010-10-21 07:00:00 Baltimore New York 2010-10-21 28.0 1599 2010-10-21 07:00:00 @@ -1411,41 +1668,6 @@ Cleveland New York 2010-10-21 3.0 2630 2010-10-21 07:00:00 Cleveland New York 2010-10-21 29.0 2646 2010-10-21 07:00:00 Cleveland New York 2010-10-21 72.0 3014 2010-10-21 07:00:00 Washington New York 2010-10-21 22.0 7291 2010-10-21 07:00:00 -Baltimore New York 2010-10-22 -12.0 1064 2010-10-22 07:00:00 -Baltimore New York 2010-10-22 54.0 1142 2010-10-22 07:00:00 -Baltimore New York 2010-10-22 18.0 1599 2010-10-22 07:00:00 -Chicago New York 2010-10-22 2.0 361 2010-10-22 07:00:00 -Chicago New York 2010-10-22 24.0 897 2010-10-22 07:00:00 -Chicago New York 2010-10-22 16.0 1531 2010-10-22 07:00:00 -Chicago New York 2010-10-22 -6.0 1610 2010-10-22 07:00:00 -Chicago New York 2010-10-22 -11.0 3198 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 1.0 2630 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 -25.0 2646 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 -3.0 3014 2010-10-22 07:00:00 -Baltimore New York 2010-10-23 18.0 272 2010-10-23 07:00:00 -Baltimore New York 2010-10-23 -10.0 1805 2010-10-23 07:00:00 -Baltimore New York 2010-10-23 6.0 3171 2010-10-23 07:00:00 -Chicago New York 2010-10-23 3.0 384 2010-10-23 07:00:00 -Chicago New York 2010-10-23 32.0 426 2010-10-23 07:00:00 -Chicago New York 2010-10-23 1.0 650 2010-10-23 07:00:00 -Chicago New York 2010-10-23 11.0 3085 2010-10-23 07:00:00 -Cleveland New York 2010-10-23 -21.0 2932 2010-10-23 07:00:00 -Washington New York 2010-10-23 -25.0 5832 2010-10-23 07:00:00 -Washington New York 2010-10-23 -21.0 5904 2010-10-23 07:00:00 -Washington New York 2010-10-23 -18.0 5917 2010-10-23 07:00:00 -Washington New York 2010-10-23 -16.0 7274 2010-10-23 07:00:00 -Baltimore New York 2010-10-24 12.0 1599 2010-10-24 07:00:00 -Baltimore New York 2010-10-24 20.0 2571 2010-10-24 07:00:00 -Chicago New York 2010-10-24 10.0 361 2010-10-24 07:00:00 -Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -5.0 1531 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -17.0 1610 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -3.0 3198 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 5.0 2254 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -11.0 2630 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -20.0 2646 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -9.0 3014 2010-10-24 07:00:00 -Washington New York 2010-10-24 -26.0 7282 2010-10-24 07:00:00 Baltimore New York 2010-10-25 -25.0 1064 2010-10-25 07:00:00 Baltimore New York 2010-10-25 92.0 1142 2010-10-25 07:00:00 Baltimore New York 2010-10-25 106.0 1599 2010-10-25 07:00:00 @@ -1458,19 +1680,42 @@ Cleveland New York 2010-10-25 -4.0 2630 2010-10-25 07:00:00 Cleveland New York 2010-10-25 81.0 2646 2010-10-25 07:00:00 Cleveland New York 2010-10-25 42.0 3014 2010-10-25 07:00:00 Washington New York 2010-10-25 9.0 7291 2010-10-25 07:00:00 -Baltimore New York 2010-10-26 -22.0 1064 2010-10-26 07:00:00 -Baltimore New York 2010-10-26 123.0 1142 2010-10-26 07:00:00 -Baltimore New York 2010-10-26 90.0 1599 2010-10-26 07:00:00 -Chicago New York 2010-10-26 12.0 361 2010-10-26 07:00:00 -Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 -Chicago New York 2010-10-26 29.0 1531 2010-10-26 07:00:00 -Chicago New York 2010-10-26 -17.0 1610 2010-10-26 07:00:00 -Chicago New York 2010-10-26 6.0 3198 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 4.0 2630 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 -27.0 2646 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 -11.0 2662 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 13.0 3014 2010-10-26 07:00:00 -Washington New York 2010-10-26 4.0 7291 2010-10-26 07:00:00 +Baltimore New York 2010-10-24 12.0 1599 2010-10-24 07:00:00 +Baltimore New York 2010-10-24 20.0 2571 2010-10-24 07:00:00 +Chicago New York 2010-10-24 10.0 361 2010-10-24 07:00:00 +Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -5.0 1531 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -17.0 1610 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -3.0 3198 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 5.0 2254 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -11.0 2630 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -20.0 2646 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -9.0 3014 2010-10-24 07:00:00 +Washington New York 2010-10-24 -26.0 7282 2010-10-24 07:00:00 +Baltimore New York 2010-10-23 18.0 272 2010-10-23 07:00:00 +Baltimore New York 2010-10-23 -10.0 1805 2010-10-23 07:00:00 +Baltimore New York 2010-10-23 6.0 3171 2010-10-23 07:00:00 +Chicago New York 2010-10-23 3.0 384 2010-10-23 07:00:00 +Chicago New York 2010-10-23 32.0 426 2010-10-23 07:00:00 +Chicago New York 2010-10-23 1.0 650 2010-10-23 07:00:00 +Chicago New York 2010-10-23 11.0 3085 2010-10-23 07:00:00 +Cleveland New York 2010-10-23 -21.0 2932 2010-10-23 07:00:00 +Washington New York 2010-10-23 -25.0 5832 2010-10-23 07:00:00 +Washington New York 2010-10-23 -21.0 5904 2010-10-23 07:00:00 +Washington New York 2010-10-23 -18.0 5917 2010-10-23 07:00:00 +Washington New York 2010-10-23 -16.0 7274 2010-10-23 07:00:00 +Baltimore New York 2010-10-29 -24.0 1064 2010-10-29 07:00:00 +Baltimore New York 2010-10-29 21.0 1142 2010-10-29 07:00:00 +Baltimore New York 2010-10-29 -2.0 1599 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -12.0 361 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -11.0 897 2010-10-29 07:00:00 +Chicago New York 2010-10-29 15.0 1531 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -18.0 1610 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -4.0 3198 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -4.0 2630 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -19.0 2646 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -12.0 3014 2010-10-29 07:00:00 +Washington New York 2010-10-29 1.0 7291 2010-10-29 07:00:00 Baltimore New York 2010-10-27 -18.0 1064 2010-10-27 07:00:00 Baltimore New York 2010-10-27 49.0 1142 2010-10-27 07:00:00 Baltimore New York 2010-10-27 92.0 1599 2010-10-27 07:00:00 @@ -1494,18 +1739,6 @@ Cleveland New York 2010-10-28 3.0 2630 2010-10-28 07:00:00 Cleveland New York 2010-10-28 -6.0 2646 2010-10-28 07:00:00 Cleveland New York 2010-10-28 1.0 3014 2010-10-28 07:00:00 Washington New York 2010-10-28 45.0 7291 2010-10-28 07:00:00 -Baltimore New York 2010-10-29 -24.0 1064 2010-10-29 07:00:00 -Baltimore New York 2010-10-29 21.0 1142 2010-10-29 07:00:00 -Baltimore New York 2010-10-29 -2.0 1599 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -12.0 361 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -11.0 897 2010-10-29 07:00:00 -Chicago New York 2010-10-29 15.0 1531 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -18.0 1610 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -4.0 3198 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -4.0 2630 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -19.0 2646 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -12.0 3014 2010-10-29 07:00:00 -Washington New York 2010-10-29 1.0 7291 2010-10-29 07:00:00 Baltimore New York 2010-10-30 14.0 272 2010-10-30 07:00:00 Baltimore New York 2010-10-30 -1.0 1805 2010-10-30 07:00:00 Baltimore New York 2010-10-30 5.0 3171 2010-10-30 07:00:00 @@ -1517,6 +1750,28 @@ Cleveland New York 2010-10-30 -23.0 2018 2010-10-30 07:00:00 Cleveland New York 2010-10-30 -12.0 2932 2010-10-30 07:00:00 Washington New York 2010-10-30 -27.0 5904 2010-10-30 07:00:00 Washington New York 2010-10-30 -16.0 5917 2010-10-30 07:00:00 +Baltimore New York 2010-10-20 -30.0 1064 2010-10-20 07:00:00 +Baltimore New York 2010-10-20 23.0 1142 2010-10-20 07:00:00 +Baltimore New York 2010-10-20 6.0 1599 2010-10-20 07:00:00 +Chicago New York 2010-10-20 42.0 361 2010-10-20 07:00:00 +Chicago New York 2010-10-20 24.0 897 2010-10-20 07:00:00 +Chicago New York 2010-10-20 15.0 1531 2010-10-20 07:00:00 +Chicago New York 2010-10-20 -6.0 1610 2010-10-20 07:00:00 +Chicago New York 2010-10-20 -2.0 3198 2010-10-20 07:00:00 +Cleveland New York 2010-10-20 -8.0 2630 2010-10-20 07:00:00 +Cleveland New York 2010-10-20 -15.0 3014 2010-10-20 07:00:00 +Washington New York 2010-10-20 -2.0 7291 2010-10-20 07:00:00 +Baltimore New York 2010-10-22 -12.0 1064 2010-10-22 07:00:00 +Baltimore New York 2010-10-22 54.0 1142 2010-10-22 07:00:00 +Baltimore New York 2010-10-22 18.0 1599 2010-10-22 07:00:00 +Chicago New York 2010-10-22 2.0 361 2010-10-22 07:00:00 +Chicago New York 2010-10-22 24.0 897 2010-10-22 07:00:00 +Chicago New York 2010-10-22 16.0 1531 2010-10-22 07:00:00 +Chicago New York 2010-10-22 -6.0 1610 2010-10-22 07:00:00 +Chicago New York 2010-10-22 -11.0 3198 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 1.0 2630 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 -25.0 2646 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 -3.0 3014 2010-10-22 07:00:00 Baltimore New York 2010-10-31 -1.0 1599 2010-10-31 07:00:00 Baltimore New York 2010-10-31 -14.0 2571 2010-10-31 07:00:00 Chicago New York 2010-10-31 -25.0 361 2010-10-31 07:00:00 @@ -1626,26 +1881,67 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10- 2010-10-29 07:00:00 12 2010-10-30 07:00:00 11 2010-10-31 07:00:00 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: flights_tiny_orc_partitioned_timestamp + Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 137 Data size: 5480 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 5480 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: flights_tiny_orc_partitioned_timestamp - Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - ListSink + ListSink PREHOOK: query: select * from flights_tiny_orc_partitioned_timestamp PREHOOK: type: QUERY @@ -1679,17 +1975,19 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10- POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10-30 07%3A00%3A00 POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10-31 07%3A00%3A00 #### A masked pattern was here #### -Baltimore New York 2010-10-20 -30.0 1064 2010-10-20 07:00:00 -Baltimore New York 2010-10-20 23.0 1142 2010-10-20 07:00:00 -Baltimore New York 2010-10-20 6.0 1599 2010-10-20 07:00:00 -Chicago New York 2010-10-20 42.0 361 2010-10-20 07:00:00 -Chicago New York 2010-10-20 24.0 897 2010-10-20 07:00:00 -Chicago New York 2010-10-20 15.0 1531 2010-10-20 07:00:00 -Chicago New York 2010-10-20 -6.0 1610 2010-10-20 07:00:00 -Chicago New York 2010-10-20 -2.0 3198 2010-10-20 07:00:00 -Cleveland New York 2010-10-20 -8.0 2630 2010-10-20 07:00:00 -Cleveland New York 2010-10-20 -15.0 3014 2010-10-20 07:00:00 -Washington New York 2010-10-20 -2.0 7291 2010-10-20 07:00:00 +Baltimore New York 2010-10-26 -22.0 1064 2010-10-26 07:00:00 +Baltimore New York 2010-10-26 123.0 1142 2010-10-26 07:00:00 +Baltimore New York 2010-10-26 90.0 1599 2010-10-26 07:00:00 +Chicago New York 2010-10-26 12.0 361 2010-10-26 07:00:00 +Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 +Chicago New York 2010-10-26 29.0 1531 2010-10-26 07:00:00 +Chicago New York 2010-10-26 -17.0 1610 2010-10-26 07:00:00 +Chicago New York 2010-10-26 6.0 3198 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 4.0 2630 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 -27.0 2646 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 -11.0 2662 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 13.0 3014 2010-10-26 07:00:00 +Washington New York 2010-10-26 4.0 7291 2010-10-26 07:00:00 Baltimore New York 2010-10-21 17.0 1064 2010-10-21 07:00:00 Baltimore New York 2010-10-21 105.0 1142 2010-10-21 07:00:00 Baltimore New York 2010-10-21 28.0 1599 2010-10-21 07:00:00 @@ -1702,41 +2000,6 @@ Cleveland New York 2010-10-21 3.0 2630 2010-10-21 07:00:00 Cleveland New York 2010-10-21 29.0 2646 2010-10-21 07:00:00 Cleveland New York 2010-10-21 72.0 3014 2010-10-21 07:00:00 Washington New York 2010-10-21 22.0 7291 2010-10-21 07:00:00 -Baltimore New York 2010-10-22 -12.0 1064 2010-10-22 07:00:00 -Baltimore New York 2010-10-22 54.0 1142 2010-10-22 07:00:00 -Baltimore New York 2010-10-22 18.0 1599 2010-10-22 07:00:00 -Chicago New York 2010-10-22 2.0 361 2010-10-22 07:00:00 -Chicago New York 2010-10-22 24.0 897 2010-10-22 07:00:00 -Chicago New York 2010-10-22 16.0 1531 2010-10-22 07:00:00 -Chicago New York 2010-10-22 -6.0 1610 2010-10-22 07:00:00 -Chicago New York 2010-10-22 -11.0 3198 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 1.0 2630 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 -25.0 2646 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 -3.0 3014 2010-10-22 07:00:00 -Baltimore New York 2010-10-23 18.0 272 2010-10-23 07:00:00 -Baltimore New York 2010-10-23 -10.0 1805 2010-10-23 07:00:00 -Baltimore New York 2010-10-23 6.0 3171 2010-10-23 07:00:00 -Chicago New York 2010-10-23 3.0 384 2010-10-23 07:00:00 -Chicago New York 2010-10-23 32.0 426 2010-10-23 07:00:00 -Chicago New York 2010-10-23 1.0 650 2010-10-23 07:00:00 -Chicago New York 2010-10-23 11.0 3085 2010-10-23 07:00:00 -Cleveland New York 2010-10-23 -21.0 2932 2010-10-23 07:00:00 -Washington New York 2010-10-23 -25.0 5832 2010-10-23 07:00:00 -Washington New York 2010-10-23 -21.0 5904 2010-10-23 07:00:00 -Washington New York 2010-10-23 -18.0 5917 2010-10-23 07:00:00 -Washington New York 2010-10-23 -16.0 7274 2010-10-23 07:00:00 -Baltimore New York 2010-10-24 12.0 1599 2010-10-24 07:00:00 -Baltimore New York 2010-10-24 20.0 2571 2010-10-24 07:00:00 -Chicago New York 2010-10-24 10.0 361 2010-10-24 07:00:00 -Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -5.0 1531 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -17.0 1610 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -3.0 3198 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 5.0 2254 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -11.0 2630 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -20.0 2646 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -9.0 3014 2010-10-24 07:00:00 -Washington New York 2010-10-24 -26.0 7282 2010-10-24 07:00:00 Baltimore New York 2010-10-25 -25.0 1064 2010-10-25 07:00:00 Baltimore New York 2010-10-25 92.0 1142 2010-10-25 07:00:00 Baltimore New York 2010-10-25 106.0 1599 2010-10-25 07:00:00 @@ -1749,19 +2012,42 @@ Cleveland New York 2010-10-25 -4.0 2630 2010-10-25 07:00:00 Cleveland New York 2010-10-25 81.0 2646 2010-10-25 07:00:00 Cleveland New York 2010-10-25 42.0 3014 2010-10-25 07:00:00 Washington New York 2010-10-25 9.0 7291 2010-10-25 07:00:00 -Baltimore New York 2010-10-26 -22.0 1064 2010-10-26 07:00:00 -Baltimore New York 2010-10-26 123.0 1142 2010-10-26 07:00:00 -Baltimore New York 2010-10-26 90.0 1599 2010-10-26 07:00:00 -Chicago New York 2010-10-26 12.0 361 2010-10-26 07:00:00 -Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 -Chicago New York 2010-10-26 29.0 1531 2010-10-26 07:00:00 -Chicago New York 2010-10-26 -17.0 1610 2010-10-26 07:00:00 -Chicago New York 2010-10-26 6.0 3198 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 4.0 2630 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 -27.0 2646 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 -11.0 2662 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 13.0 3014 2010-10-26 07:00:00 -Washington New York 2010-10-26 4.0 7291 2010-10-26 07:00:00 +Baltimore New York 2010-10-24 12.0 1599 2010-10-24 07:00:00 +Baltimore New York 2010-10-24 20.0 2571 2010-10-24 07:00:00 +Chicago New York 2010-10-24 10.0 361 2010-10-24 07:00:00 +Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -5.0 1531 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -17.0 1610 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -3.0 3198 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 5.0 2254 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -11.0 2630 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -20.0 2646 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -9.0 3014 2010-10-24 07:00:00 +Washington New York 2010-10-24 -26.0 7282 2010-10-24 07:00:00 +Baltimore New York 2010-10-23 18.0 272 2010-10-23 07:00:00 +Baltimore New York 2010-10-23 -10.0 1805 2010-10-23 07:00:00 +Baltimore New York 2010-10-23 6.0 3171 2010-10-23 07:00:00 +Chicago New York 2010-10-23 3.0 384 2010-10-23 07:00:00 +Chicago New York 2010-10-23 32.0 426 2010-10-23 07:00:00 +Chicago New York 2010-10-23 1.0 650 2010-10-23 07:00:00 +Chicago New York 2010-10-23 11.0 3085 2010-10-23 07:00:00 +Cleveland New York 2010-10-23 -21.0 2932 2010-10-23 07:00:00 +Washington New York 2010-10-23 -25.0 5832 2010-10-23 07:00:00 +Washington New York 2010-10-23 -21.0 5904 2010-10-23 07:00:00 +Washington New York 2010-10-23 -18.0 5917 2010-10-23 07:00:00 +Washington New York 2010-10-23 -16.0 7274 2010-10-23 07:00:00 +Baltimore New York 2010-10-29 -24.0 1064 2010-10-29 07:00:00 +Baltimore New York 2010-10-29 21.0 1142 2010-10-29 07:00:00 +Baltimore New York 2010-10-29 -2.0 1599 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -12.0 361 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -11.0 897 2010-10-29 07:00:00 +Chicago New York 2010-10-29 15.0 1531 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -18.0 1610 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -4.0 3198 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -4.0 2630 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -19.0 2646 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -12.0 3014 2010-10-29 07:00:00 +Washington New York 2010-10-29 1.0 7291 2010-10-29 07:00:00 Baltimore New York 2010-10-27 -18.0 1064 2010-10-27 07:00:00 Baltimore New York 2010-10-27 49.0 1142 2010-10-27 07:00:00 Baltimore New York 2010-10-27 92.0 1599 2010-10-27 07:00:00 @@ -1785,18 +2071,6 @@ Cleveland New York 2010-10-28 3.0 2630 2010-10-28 07:00:00 Cleveland New York 2010-10-28 -6.0 2646 2010-10-28 07:00:00 Cleveland New York 2010-10-28 1.0 3014 2010-10-28 07:00:00 Washington New York 2010-10-28 45.0 7291 2010-10-28 07:00:00 -Baltimore New York 2010-10-29 -24.0 1064 2010-10-29 07:00:00 -Baltimore New York 2010-10-29 21.0 1142 2010-10-29 07:00:00 -Baltimore New York 2010-10-29 -2.0 1599 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -12.0 361 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -11.0 897 2010-10-29 07:00:00 -Chicago New York 2010-10-29 15.0 1531 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -18.0 1610 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -4.0 3198 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -4.0 2630 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -19.0 2646 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -12.0 3014 2010-10-29 07:00:00 -Washington New York 2010-10-29 1.0 7291 2010-10-29 07:00:00 Baltimore New York 2010-10-30 14.0 272 2010-10-30 07:00:00 Baltimore New York 2010-10-30 -1.0 1805 2010-10-30 07:00:00 Baltimore New York 2010-10-30 5.0 3171 2010-10-30 07:00:00 @@ -1808,6 +2082,28 @@ Cleveland New York 2010-10-30 -23.0 2018 2010-10-30 07:00:00 Cleveland New York 2010-10-30 -12.0 2932 2010-10-30 07:00:00 Washington New York 2010-10-30 -27.0 5904 2010-10-30 07:00:00 Washington New York 2010-10-30 -16.0 5917 2010-10-30 07:00:00 +Baltimore New York 2010-10-20 -30.0 1064 2010-10-20 07:00:00 +Baltimore New York 2010-10-20 23.0 1142 2010-10-20 07:00:00 +Baltimore New York 2010-10-20 6.0 1599 2010-10-20 07:00:00 +Chicago New York 2010-10-20 42.0 361 2010-10-20 07:00:00 +Chicago New York 2010-10-20 24.0 897 2010-10-20 07:00:00 +Chicago New York 2010-10-20 15.0 1531 2010-10-20 07:00:00 +Chicago New York 2010-10-20 -6.0 1610 2010-10-20 07:00:00 +Chicago New York 2010-10-20 -2.0 3198 2010-10-20 07:00:00 +Cleveland New York 2010-10-20 -8.0 2630 2010-10-20 07:00:00 +Cleveland New York 2010-10-20 -15.0 3014 2010-10-20 07:00:00 +Washington New York 2010-10-20 -2.0 7291 2010-10-20 07:00:00 +Baltimore New York 2010-10-22 -12.0 1064 2010-10-22 07:00:00 +Baltimore New York 2010-10-22 54.0 1142 2010-10-22 07:00:00 +Baltimore New York 2010-10-22 18.0 1599 2010-10-22 07:00:00 +Chicago New York 2010-10-22 2.0 361 2010-10-22 07:00:00 +Chicago New York 2010-10-22 24.0 897 2010-10-22 07:00:00 +Chicago New York 2010-10-22 16.0 1531 2010-10-22 07:00:00 +Chicago New York 2010-10-22 -6.0 1610 2010-10-22 07:00:00 +Chicago New York 2010-10-22 -11.0 3198 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 1.0 2630 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 -25.0 2646 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 -3.0 3014 2010-10-22 07:00:00 Baltimore New York 2010-10-31 -1.0 1599 2010-10-31 07:00:00 Baltimore New York 2010-10-31 -14.0 2571 2010-10-31 07:00:00 Chicago New York 2010-10-31 -25.0 361 2010-10-31 07:00:00 @@ -1816,12 +2112,16 @@ Chicago New York 2010-10-31 -4.0 1531 2010-10-31 07:00:00 Chicago New York 2010-10-31 -22.0 1610 2010-10-31 07:00:00 Chicago New York 2010-10-31 -15.0 3198 2010-10-31 07:00:00 Washington New York 2010-10-31 -18.0 7282 2010-10-31 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1840,46 +2140,102 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_timestamp Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 0, 1] Statistics: Num rows: 137 Data size: 5480 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 0, 1] Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1949,12 +2305,16 @@ Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 Chicago New York 2010-10-25 -1.0 897 2010-10-25 07:00:00 Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 Chicago New York 2010-10-27 -11.0 897 2010-10-27 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1972,12 +2332,26 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_timestamp Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: fl_time (type: timestamp) outputColumnNames: fl_time + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5] Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] keys: fl_time (type: timestamp) mode: hash outputColumnNames: _col0, _col1 @@ -1986,21 +2360,50 @@ STAGE PLANS: key expressions: _col0 (type: timestamp) sort order: + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce1.q.out ql/src/test/results/clientpositive/llap/vector_reduce1.q.out index 2e3e800..464e62f 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce1.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce1.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select b from vectortab2korc order by b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select b from vectortab2korc order by b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,25 +128,59 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: b (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce2.q.out ql/src/test/results/clientpositive/llap/vector_reduce2.q.out index fd10498..f4eb527 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce2.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce2.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,25 +128,59 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: s (type: string), i (type: int), s2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 2, 9] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce3.q.out ql/src/test/results/clientpositive/llap/vector_reduce3.q.out index 530eb58..2cfaed8 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce3.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce3.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s from vectortab2korc order by s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s from vectortab2korc order by s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,25 +128,59 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: s (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index 9571b5b..e17aff5 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -14,20 +14,24 @@ POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -46,11 +50,25 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 2) -> 4:Boolean) -> boolean, SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 3) -> 4:Boolean) -> boolean) -> boolean predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean) Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cdecimal1) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 2) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -59,16 +77,43 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 4) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: int), KEY._col1 (type: double), KEY._col2 (type: decimal(20,10)), KEY._col3 (type: decimal(23,14)) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -76,21 +121,43 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(20,10)), KEY.reducesinkkey3 (type: decimal(23,14)), VALUE._col0 (type: decimal(20,10)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 11800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 11800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index d2672de..b04f99b 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -95,32 +95,78 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT s AS `string`, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT s AS `string`, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Select Operator + expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 12, 11] + selectExpressions: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 12:String_Family, StringGroupColConcatStringScalar(col 13, val |)(children: StringScalarConcatStringGroupCol(val |, col 11)(children: StringRTrim(col 13)(children: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 13:String_Family) -> 11:String) -> 13:String_Family) -> 11:String_Family + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 20 Processor Tree: - TableScan - alias: over1korc - Select Operator - expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) - outputColumnNames: _col0, _col1, _col2 - Limit - Number of rows: 20 - ListSink + ListSink PREHOOK: query: SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, @@ -259,20 +305,24 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -291,11 +341,25 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [19] + selectExpressions: StringGroupConcatColCol(col 17, col 18)(children: StringGroupColConcatStringScalar(col 18, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 17)(children: CastLongToString(col 13)(children: CastDoubleToLong(col 15)(children: DoubleColAddDoubleScalar(col 16, val 1.0)(children: DoubleColDivideDoubleScalar(col 15, val 3.0)(children: CastLongToDouble(col 14)(children: LongColSubtractLongScalar(col 13, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 13:long) -> 14:long) -> 15:double) -> 16:double) -> 15:double) -> 13:long) -> 17:String) -> 18:String_Family) -> 17:String_Family, CastLongToString(col 13)(children: VectorUDFYearDate(col 12, field YEAR) -> 13:long) -> 18:String) -> 19:String_Family Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 19 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -304,14 +368,40 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 @@ -319,20 +409,42 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out new file mode 100644 index 0000000..c036d69 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out @@ -0,0 +1,119 @@ +PREHOOK: query: drop table orc_decimal +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table orc_decimal +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table orc_decimal (id decimal(18,0)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_decimal +POSTHOOK: query: create table orc_decimal (id decimal(18,0)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_decimal +PREHOOK: query: create table staging (id decimal(18,0)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: create table staging (id decimal(18,0)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: insert into staging values (34324.0), (100000000.0), (200000000.0), (300000000.0) +PREHOOK: type: QUERY +PREHOOK: Output: default@staging +POSTHOOK: query: insert into staging values (34324.0), (100000000.0), (200000000.0), (300000000.0) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@staging +POSTHOOK: Lineage: staging.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert overwrite table orc_decimal select id from staging +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_decimal +POSTHOOK: query: insert overwrite table orc_decimal select id from staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_decimal +POSTHOOK: Lineage: orc_decimal.id SIMPLE [(staging)staging.FieldSchema(name:id, type:decimal(18,0), comment:null), ] +PREHOOK: query: explain vectorization expression +select * from orc_decimal where id in ('100000000', '200000000') +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from orc_decimal where id in ('100000000', '200000000') +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_decimal + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id) IN ('100000000', '200000000') (type: boolean) + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: decimal(18,0)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is decimal(18,0) but the common type is string + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from orc_decimal where id in ('100000000', '200000000') +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_decimal where id in ('100000000', '200000000') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_decimal +#### A masked pattern was here #### +100000000 +200000000 +PREHOOK: query: drop table orc_decimal +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_decimal +PREHOOK: Output: default@orc_decimal +POSTHOOK: query: drop table orc_decimal +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_decimal +POSTHOOK: Output: default@orc_decimal +PREHOOK: query: drop table staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@staging +PREHOOK: Output: default@staging +POSTHOOK: query: drop table staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@staging diff --git ql/src/test/results/clientpositive/llap/vector_struct_in.q.out ql/src/test/results/clientpositive/llap/vector_struct_in.q.out index 06b1d71..d583f09 100644 --- ql/src/test/results/clientpositive/llap/vector_struct_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_struct_in.q.out @@ -14,7 +14,7 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_1.lineid SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -28,7 +28,7 @@ struct('nine','1'), struct('ten','1') ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -42,15 +42,68 @@ struct('nine','1'), struct('ten','1') ) POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_2] - Output:["_col0","_col1"] - Filter Operator [FIL_4] - predicate:(struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> boolean + predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) + Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select * from test_1 where struct(`id`, `lineid`) IN ( @@ -84,7 +137,7 @@ POSTHOOK: Input: default@test_1 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -98,7 +151,7 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -112,13 +165,62 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2"] - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: id (type: string), lineid (type: string), (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> 3:boolean + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) IN ( @@ -168,7 +270,7 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_2.lineid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -182,7 +284,7 @@ struct(9,1), struct(10,1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -196,15 +298,68 @@ struct(9,1), struct(10,1) ) POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_2] - Output:["_col0","_col1"] - Filter Operator [FIL_4] - predicate:(struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> boolean + predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), lineid (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select * from test_2 where struct(`id`, `lineid`) IN ( @@ -238,7 +393,7 @@ POSTHOOK: Input: default@test_2 #### A masked pattern was here #### 1 1 7 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -252,7 +407,7 @@ struct(9,1), struct(10,1) ) as b from test_2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -266,13 +421,62 @@ struct(9,1), struct(10,1) ) as b from test_2 POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2"] - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: id (type: int), lineid (type: int), (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> 3:boolean + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) IN ( @@ -322,7 +526,7 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test_3 POSTHOOK: Lineage: test_3.id SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_3.lineid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -336,7 +540,7 @@ struct('nine',1), struct('ten',1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -350,15 +554,68 @@ struct('nine',1), struct('ten',1) ) POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_2] - Output:["_col0","_col1"] - Filter Operator [FIL_4] - predicate:(struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_3 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> boolean + predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select * from test_3 where struct(`id`, `lineid`) IN ( @@ -392,7 +649,7 @@ POSTHOOK: Input: default@test_3 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -406,7 +663,7 @@ struct('nine',1), struct('ten',1) ) as b from test_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -420,13 +677,62 @@ struct('nine',1), struct('ten',1) ) as b from test_3 POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2"] - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_3 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: id (type: string), lineid (type: int), (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> 3:boolean + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) IN ( @@ -477,7 +783,7 @@ POSTHOOK: Output: default@test_4 POSTHOOK: Lineage: test_4.my_bigint EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_double EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_string SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -492,7 +798,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -507,15 +813,68 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_2] - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_4] - predicate:(struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) - TableScan [TS_0] - Output:["my_bigint","my_string","my_double"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_4 + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> boolean + predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( @@ -550,7 +909,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_4 #### A masked pattern was here #### 1 a 0.5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -565,7 +924,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -580,13 +939,62 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] - Output:["my_bigint","my_string","my_double"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_4 + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Select Operator + expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double), (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 4] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> 4:boolean + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( diff --git ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out new file mode 100644 index 0000000..56fb85c --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out @@ -0,0 +1,409 @@ +PREHOOK: query: explain vectorization expression +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 'key1' (type: string), 'value1' (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: ConstantVectorExpression(val key1) -> 12:string, ConstantVectorExpression(val value1) -> 13:string + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +_c0 _c1 +key1 value1 +PREHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: explain vectorization expression +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 17.29 (type: decimal(18,9)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: ConstantVectorExpression(val 17.29) -> 12:decimal(18,9) + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@decimal_2 +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +_col0 +PREHOOK: query: select count(*) from decimal_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from decimal_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +_c0 +1 +PREHOOK: query: drop table decimal_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_2 +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: drop table decimal_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_2 +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: explain vectorization expression +select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 1:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +_c0 +1 +PREHOOK: query: explain vectorization expression +create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain vectorization expression +create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false +#### A masked pattern was here #### + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: _c0 int + input format: org.apache.hadoop.mapred.TextInputFormat +#### A masked pattern was here #### + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + isTemporary: true + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@dual +POSTHOOK: query: create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dual +_c0 +PREHOOK: query: select * from dual +PREHOOK: type: QUERY +PREHOOK: Input: default@dual +#### A masked pattern was here #### +POSTHOOK: query: select * from dual +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dual +#### A masked pattern was here #### +dual._c0 +1 diff --git ql/src/test/results/clientpositive/llap/vector_udf2.q.out ql/src/test/results/clientpositive/llap/vector_udf2.q.out new file mode 100644 index 0000000..d344345 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_udf2.q.out @@ -0,0 +1,188 @@ +PREHOOK: query: drop table varchar_udf_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_udf_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_udf_2 +POSTHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_udf_2 +PREHOOK: query: insert overwrite table varchar_udf_2 + select key, value, key, value from src where key = '238' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_udf_2 +POSTHOOK: query: insert overwrite table varchar_udf_2 + select key, value, key, value from src where key = '238' limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_udf_2 +POSTHOOK: Lineage: varchar_udf_2.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_2.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain vectorization expression +select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: varchar_udf_2 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: (c1 like '%38%') (type: boolean), (c2 like 'val_%') (type: boolean), (c3 like '%38') (type: boolean), (c1 like '%3x8%') (type: boolean), (c2 like 'xval_%') (type: boolean), (c3 like '%x38') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9] + selectExpressions: SelectStringColLikeStringScalar(col 0) -> 4:String_Family, SelectStringColLikeStringScalar(col 1) -> 5:String_Family, SelectStringColLikeStringScalar(col 2) -> 6:String_Family, SelectStringColLikeStringScalar(col 0) -> 7:String_Family, SelectStringColLikeStringScalar(col 1) -> 8:String_Family, SelectStringColLikeStringScalar(col 2) -> 9:String_Family + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_2 +#### A masked pattern was here #### +POSTHOOK: query: select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_2 +#### A masked pattern was here #### +true true true false false false +PREHOOK: query: drop table varchar_udf_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_udf_2 +PREHOOK: Output: default@varchar_udf_2 +POSTHOOK: query: drop table varchar_udf_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_udf_2 +POSTHOOK: Output: default@varchar_udf_2 +PREHOOK: query: create temporary table HIVE_14349 (a string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@HIVE_14349 +POSTHOOK: query: create temporary table HIVE_14349 (a string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@HIVE_14349 +PREHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') +PREHOOK: type: QUERY +PREHOOK: Output: default@hive_14349 +POSTHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hive_14349 +POSTHOOK: Lineage: hive_14349.a SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +XYZa +PREHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') +PREHOOK: type: QUERY +PREHOOK: Output: default@hive_14349 +POSTHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hive_14349 +POSTHOOK: Lineage: hive_14349.a SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +XYZab +PREHOOK: query: drop table HIVE_14349 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hive_14349 +PREHOOK: Output: default@hive_14349 +POSTHOOK: query: drop table HIVE_14349 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hive_14349 +POSTHOOK: Output: default@hive_14349 diff --git ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out index 7d14256..5979f8b 100644 --- ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi v POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -143,12 +147,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: name: default.varchar_lazy_binary_columnar Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out index 68dd80f..1fc3df0 100644 --- ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out @@ -124,10 +124,14 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_join1_str_orc POSTHOOK: Lineage: varchar_join1_str_orc.c1 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: varchar_join1_str_orc.c2 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c2, type:string, comment:null), ] -PREHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,6 +174,14 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -190,8 +202,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(10)) @@ -222,10 +249,14 @@ POSTHOOK: Input: default@varchar_join1_vc1_orc 1 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -268,6 +299,14 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -288,8 +327,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(20)) @@ -322,10 +376,14 @@ POSTHOOK: Input: default@varchar_join1_vc2_orc 1 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -368,6 +426,14 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -388,8 +454,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out index 74861a6..a50b8f1 100644 --- ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -84,8 +88,23 @@ STAGE PLANS: value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -146,16 +165,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -185,8 +208,23 @@ STAGE PLANS: value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -248,12 +286,16 @@ POSTHOOK: query: create table varchar_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -273,36 +315,81 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + selectExpressions: CastLongToVarChar(col 0, maxLength 25) -> 1:VarChar Statistics: Num rows: 10 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 092a2ea..e0ee99c 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -14,12 +14,16 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -37,12 +41,27 @@ STAGE PLANS: TableScan alias: count_case_groupby Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] + selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:int Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -51,21 +70,50 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 64ac81c..67fcdaa 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -6,7 +6,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -14,6 +14,10 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,43 +36,101 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 1) -> tinyint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -100,16 +162,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,42 +194,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -189,7 +313,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39856 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -202,7 +326,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -215,6 +339,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -248,8 +376,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -263,6 +404,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -311,7 +459,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -319,7 +467,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -327,6 +475,10 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -345,43 +497,101 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 3) -> bigint, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -413,16 +623,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -441,42 +655,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(cbigint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -502,7 +774,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1698460028409 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -515,7 +787,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -528,6 +800,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -561,8 +837,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -576,6 +865,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -624,7 +920,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -632,7 +928,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -640,6 +936,10 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -658,43 +958,101 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 4) -> float, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFCount(col 4) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 0) -> float, VectorUDAFMaxDouble(col 1) -> float, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -726,16 +1084,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -754,42 +1116,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(cfloat) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 4) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -815,7 +1235,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39479.635992884636 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -828,7 +1248,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -841,6 +1261,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -874,8 +1298,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -889,6 +1326,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -938,7 +1382,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -965,7 +1409,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -992,6 +1436,10 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1009,15 +1457,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterDoubleColLessDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterLongColEqualLongScalar(col 11, val 1) -> boolean, FilterLongScalarEqualLongColumn(val 3569, col 0)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 4, 0] Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 3) -> struct, VectorUDAFStdPopLong(col 3) -> struct, VectorUDAFVarSampLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE @@ -1027,8 +1493,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) @@ -29934,23 +30413,109 @@ POSTHOOK: query: explain extended select * from alltypesorc where (cint=45 and cfloat=3.02) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) + Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypesorc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 2641964 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 2641964 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypesorc + name: default.alltypesorc + Truncated Path -> Alias: + /alltypesorc [alltypesorc] + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink + ListSink PREHOOK: query: explain extended select * from alltypesorc where (cint=49 and cfloat=3.5) or @@ -29963,23 +30528,109 @@ POSTHOOK: query: explain extended select * from alltypesorc where (cint=45 and cfloat=3.02) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) + Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypesorc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 2641964 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 2641964 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypesorc + name: default.alltypesorc + Truncated Path -> Alias: + /alltypesorc [alltypesorc] + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink + ListSink PREHOOK: query: explain extended select * from alltypesorc where (cint=49 or cfloat=3.5) and @@ -29992,23 +30643,109 @@ POSTHOOK: query: explain extended select * from alltypesorc where (cint=45 or cfloat=3.02) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) + Statistics: Num rows: 30 Data size: 7690 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 30 Data size: 7690 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 7690 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypesorc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 2641964 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 2641964 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypesorc + name: default.alltypesorc + Truncated Path -> Alias: + /alltypesorc [alltypesorc] + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink + ListSink PREHOOK: query: explain extended select count(*),cstring1 from alltypesorc where cstring1='biology' or cstring1='history' diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index 71aa76f..87bac36 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -31,7 +31,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -64,6 +64,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -82,15 +86,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -103,8 +126,21 @@ STAGE PLANS: value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -123,16 +159,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -253,7 +306,7 @@ NULL -63 1969-12-31 16:00:15.436 -63.0 NULL 63 -63 0 -63.0 -0.0 63.0 -5011.839 0 NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -286,7 +339,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -319,6 +372,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -337,15 +394,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -358,8 +434,21 @@ STAGE PLANS: value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -378,16 +467,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index 8ba8413..541d13f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -31,7 +31,7 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -64,6 +64,10 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -103,8 +107,21 @@ STAGE PLANS: value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) @@ -123,6 +140,13 @@ STAGE PLANS: value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 04cd902..02b221d 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -29,7 +29,7 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -60,6 +60,10 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -99,8 +103,21 @@ STAGE PLANS: value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) @@ -119,6 +136,13 @@ STAGE PLANS: value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index 622ac88..686b16c 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -38,6 +38,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -76,8 +80,21 @@ STAGE PLANS: value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/llap/vectorization_17.q.out ql/src/test/results/clientpositive/llap/vectorization_17.q.out index 94e17b0..9ea8483 100644 --- ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -22,7 +22,7 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -46,6 +46,10 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -77,8 +81,23 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_7.q.out ql/src/test/results/clientpositive/llap/vectorization_7.q.out index f5e3e25..70c544d 100644 --- ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -25,7 +25,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -52,6 +52,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,32 +73,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -188,7 +234,7 @@ NULL NULL -7196 -61 1969-12-31 15:59:44.823 NULL NULL 0 7196 61 78 NULL NULL 61 NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -215,7 +261,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -242,6 +288,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -259,32 +309,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_8.q.out ql/src/test/results/clientpositive/llap/vectorization_8.q.out index f1bb8c3..ae5e632 100644 --- ql/src/test/results/clientpositive/llap/vectorization_8.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_8.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -23,7 +23,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -48,6 +48,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -65,32 +69,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 10.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 16.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -175,7 +221,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:43.783 -200.0 NULL NULL -11.0 200.0 -5438.15 51400.0 NULL 1.2116287E7 200.0 9.611 11.0 NULL 1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL 1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -200,7 +246,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -225,6 +271,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -242,32 +292,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 12.503)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 11.998)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index 622ac88..686b16c 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -38,6 +38,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -76,8 +80,21 @@ STAGE PLANS: value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out index 9a6cb52..74455f5 100644 --- ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out @@ -12,28 +12,78 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_decimal_test + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean + predicate: (cint is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdate (type: date), cdecimal (type: decimal(20,10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: date_decimal_test - Filter Operator - predicate: (cint is not null and cdouble is not null) (type: boolean) - Select Operator - expressions: cdate (type: date), cdecimal (type: decimal(20,10)) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out new file mode 100644 index 0000000..d5a1f47 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out @@ -0,0 +1,188 @@ +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 2 + Offset of rows: 3 + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1887561756 10361.0 +-1887561756 -8881.0 +PREHOOK: query: explain vectorization expression +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: smallint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Limit Vectorization: + className: VectorLimitOperator + native: true + Offset of rows: 10 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 diff --git ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index 2078e81..872e7f3 100644 --- ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -78,8 +82,23 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out index 4a59aa4..6a99fc3 100644 --- ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -39,8 +43,21 @@ STAGE PLANS: value expressions: _col0 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 908e8ab..fe257aa 100644 --- ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: EXPLAIN SELECT AVG(cint), +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -34,7 +35,8 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT AVG(cint), +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -70,6 +72,10 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -87,15 +93,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) -> boolean) -> boolean predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 1, 4, 0] Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 2) -> struct, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE @@ -105,8 +129,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) @@ -208,7 +245,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 1.6000018929276082E8 1.5999646129276082E8 -1.5999646129276082E8 1.5999646129276082E8 2.5598867626205912E16 -8706342.964000002 -1.6000018929276082E8 5.481251832900256E8 4.095728233294762E24 8549.657499338187 -5.481251832900256E8 3.8812872199726474E8 2.12743126884874112E17 3.0054786945575034E17 -5.700752675298234 -3.0054786945575034E17 3.0054786945575034E17 973579.3664121237 5.48222463472403E8 -973579.3664121237 -18.377427808018613 -64 2044 -6.573680812059066E-5 18.377427808018613 -PREHOOK: query: EXPLAIN SELECT MAX(cint), +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -241,7 +279,8 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT MAX(cint), +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -274,6 +313,10 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -291,15 +334,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2036734 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 3, val 197) -> boolean, FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -26.28) -> boolean, FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 1) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss.*) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 4, val 79.5530014038086) -> boolean, FilterStringColLikeStringScalar(col 7, pattern 10%) -> boolean) -> boolean) -> boolean predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 5, 0] Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE @@ -309,8 +370,21 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) @@ -406,7 +480,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 511 5454.512308361625 1626869520 7.2647256545687792E16 -PREHOOK: query: EXPLAIN SELECT VAR_POP(cbigint), +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -438,7 +513,8 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT VAR_POP(cbigint), +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -470,6 +546,10 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -487,15 +567,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2, 5] Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE @@ -505,8 +603,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) @@ -601,7 +712,8 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 2.5109214708345636E18 -2.5109214708345636E18 5.0218429416691272E18 2780 75.198 62 2.5109214708345661E18 2.5109214708345636E18 -1.0 2780 -2780 9460.675803068349 -2.5109214708345636E18 -2118360 1072872630 -2118298 -2.5109214697616911E18 185935.34910862707 0 758 -1.733509234828496 -3728 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN SELECT AVG(ctinyint), +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -623,7 +735,8 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT AVG(ctinyint), +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -645,6 +758,10 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -662,15 +779,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2139070 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) outputColumnNames: ctinyint, cbigint, cint, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 2, 4] Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFVarPopLong(col 2) -> struct, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFMaxDouble(col 4) -> float + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE @@ -680,8 +815,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) @@ -756,7 +904,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -0.5934409161894847 6980.406559083811 6979.813118167622 2141851355 -11761.597368421053 -6980.406559083811 1.5852855222071937E8 -0.5934409161894847 2.5099887741860852E16 1.52140608502098816E18 -2141851355 -13.510823917813237 79.553 -3.998255191435157E19 -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -792,7 +941,8 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -828,6 +978,10 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -845,32 +999,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3056470 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6, pattern a.*) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val 1, col 11) -> boolean, FilterDecimalColLessDecimalScalar(col 12, val 79.553)(children: CastLongToDecimal(col 1) -> 12:decimal(8,3)) -> boolean, FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 0) -> 13:double) -> boolean, FilterDoubleColGreaterEqualDoubleColumn(col 4, col 13)(children: CastLongToFloatViaLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColGreaterLongColumn(col 0, col 3)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) Statistics: Num rows: 9898 Data size: 2462086 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(9,7)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 14, 15, 17, 18, 20, 22, 24, 26, 13, 23, 28, 19, 30] + selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3) -> 14:long, LongColUnaryMinus(col 2) -> 15:long, DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 17:decimal(14,3), LongColUnaryMinus(col 1) -> 18:long, LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 20:long, LongColAddLongColumn(col 21, col 19)(children: LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 21:long, LongColUnaryMinus(col 1) -> 19:long) -> 22:long, DoubleColDivideDoubleColumn(col 13, col 23)(children: CastLongToDouble(col 2) -> 13:double, CastLongToDouble(col 2) -> 23:double) -> 24:double, DecimalColSubtractDecimalScalar(col 25, val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 25:decimal(14,3)) -> 26:decimal(15,3), DoubleColUnaryMinus(col 4) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -89010.0) -> 23:double, DoubleColDivideDoubleScalar(col 27, val 988888.0)(children: CastLongToDouble(col 0) -> 27:double) -> 28:double, LongColUnaryMinus(col 0) -> 19:long, DecimalScalarDivideDecimalColumn(val 79.553, col 29)(children: CastLongToDecimal(col 0) -> 29:decimal(3,0)) -> 30:decimal(9,7) Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(9,7)) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: boolean), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: float), KEY.reducesinkkey7 (type: timestamp), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: int), KEY.reducesinkkey12 (type: decimal(14,3)), KEY.reducesinkkey13 (type: smallint), KEY.reducesinkkey14 (type: smallint), KEY.reducesinkkey15 (type: smallint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(15,3)), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: tinyint), KEY.reducesinkkey22 (type: decimal(9,7)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1009,7 +1205,8 @@ NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 15:59:56.04 NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 16:00:01.785 -7196 -1639157869 6110780535632 NULL NULL 7196 -14392 -7196 NULL NULL 64.0 6.4051596E8 -6.471915929812072E-5 64 -1.2430156 NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 16:00:11.912 -7196 -1615920595 6024151978160 NULL NULL 7196 -14392 -7196 NULL NULL 64.0 6.4051596E8 -6.471915929812072E-5 64 -1.2430156 NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 16:00:12.339 -7196 1805860756 -6732248898368 NULL NULL 7196 -14392 -7196 NULL NULL 64.0 6.4051596E8 -6.471915929812072E-5 64 -1.2430156 -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1044,7 +1241,8 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1079,6 +1277,10 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1096,32 +1298,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) Statistics: Num rows: 8195 Data size: 1735170 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28] + selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) sort order: +++++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: boolean), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: timestamp), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: decimal(5,3)), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: float), KEY.reducesinkkey15 (type: float), KEY.reducesinkkey16 (type: float), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: bigint), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: bigint), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey21 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21] Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1211,7 +1455,8 @@ POSTHOOK: Input: default@alltypesorc -462839731 988888 ss false -51.0 NULL NULL NULL Lml5J2QBU77 false -468.04059812638036 44.210 468.04059812638036 10.175 51.0 -102.0 -102.0 NULL NULL -988888 417.04059812638036 NULL 3569 NULL NULL -635141101 -89010 ss false -51.0 NULL NULL NULL rVWAj4N1MCg8Scyp7wj2C true 7135.6151106617235 -69.746 -7135.6151106617235 10.175 51.0 -102.0 -102.0 NULL NULL 89010 -7186.6151106617235 NULL 3569 NULL NULL WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1245,7 +1490,8 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1279,6 +1525,10 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1296,33 +1546,75 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 12, val -26.28)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterStringGroupColGreaterEqualStringScalar(col 6, val ss) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 0, val -89010)(children: col 0) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13, col 4)(children: CastLongToFloatViaLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val -26.28, col 12)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean) -> boolean) -> boolean predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) Statistics: Num rows: 10922 Data size: 2312410 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 6, 11, 9, 5, 4, 3, 1, 10, 14, 15, 16, 13, 18, 19, 20, 22, 25, 27, 24, 17, 28] + selectExpressions: LongColAddLongColumn(col 2, col 1)(children: col 1) -> 14:long, LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 15:long, LongColUnaryMinus(col 3) -> 16:long, DoubleColUnaryMinus(col 4) -> 13:double, LongColAddLongColumn(col 17, col 3)(children: LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 17:long) -> 18:long, DoubleColDivideDoubleColumn(col 5, col 5) -> 19:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColMultiplyLongColumn(col 17, col 21)(children: col 17, LongColUnaryMinus(col 3) -> 21:long) -> 22:long, DoubleColAddDoubleColumn(col 23, col 24)(children: DoubleColUnaryMinus(col 5) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double, DecimalScalarDivideDecimalColumn(val -1.389, col 26)(children: CastLongToDecimal(col 0) -> 26:decimal(3,0)) -> 27:decimal(8,7), DoubleColModuloDoubleColumn(col 23, col 5)(children: CastLongToDouble(col 3) -> 23:double) -> 24:double, LongColUnaryMinus(col 1) -> 17:long, LongColAddLongColumn(col 1, col 21)(children: col 1, LongColAddLongColumn(col 2, col 1)(children: col 1) -> 21:long) -> 28:long Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(8,7)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 75 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1483,7 +1775,8 @@ NULL NULL true 1969-12-31 15:59:58.456 15601.0 -62.0 667693308 15601 NULL NULL 6 NULL NULL true 1969-12-31 15:59:58.456 15601.0 -63.0 -200542601 15601 NULL NULL -200542538 200542601 63.0 -401085139 1.0 -15601.0 NULL -2.00558202E8 0.0220476 -7347.0 -15601 NULL NULL NULL true 1969-12-31 15:59:58.456 15601.0 -63.0 -721244708 15601 NULL NULL -721244645 721244708 63.0 -1442489353 1.0 -15601.0 NULL -7.21260309E8 0.0220476 -10478.0 -15601 NULL NULL NULL true 1969-12-31 15:59:58.456 15601.0 -64.0 -1809291815 15601 NULL NULL -1809291751 1809291815 64.0 -3618583566 1.0 -15601.0 NULL -1.809307416E9 0.0217031 -12643.0 -15601 NULL -PREHOOK: query: EXPLAIN SELECT ctimestamp1, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1510,7 +1803,8 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1537,6 +1831,10 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1554,33 +1852,75 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2528254 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalScalarGreaterEqualDecimalColumn(val -1.389, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterLongScalarGreaterLongColumn(val -6432, col 1)(children: col 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5, col 4)(children: col 4) -> boolean, FilterStringGroupColLessEqualStringScalar(col 7, val a) -> boolean) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern ss%) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val 10.175, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean) -> boolean predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) Statistics: Num rows: 3868 Data size: 795962 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 7, 5, 4, 3, 1, 15, 16, 14, 17, 18, 20, 19, 21, 22, 24] + selectExpressions: DoubleColDivideDoubleScalar(col 14, val 3569.0)(children: CastLongToDouble(col 3) -> 14:double) -> 15:double, LongScalarSubtractLongColumn(val -257, col 1)(children: col 1) -> 16:long, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4) -> 14:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColMultiplyDoubleScalar(col 5, val 10.175) -> 18:double, DoubleColDivideDoubleColumn(col 19, col 4)(children: col 19, col 4) -> 20:double, DoubleColUnaryMinus(col 4) -> 19:double, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 21:long, DoubleColUnaryMinus(col 5) -> 22:double, DoubleColMultiplyDoubleColumn(col 5, col 23)(children: DoubleColUnaryMinus(col 5) -> 23:double) -> 24:double Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: float), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14] Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 45 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1696,7 +2036,8 @@ POSTHOOK: Input: default@alltypesorc NULL 4hA4KQj2vD3fI6gX82220d 12329.0 NULL -1887561756 12329 -528876.9279910339 -12586 NULL -12329.0 125447.57500000001 NULL NULL -3104 -12329.0 -1.52004241E8 NULL 4hA4KQj2vD3fI6gX82220d 477.0 NULL -1887561756 477 -528876.9279910339 -734 NULL -477.0 4853.475 NULL NULL -326 -477.0 -227529.0 NULL xH7445Rals48VOulSyR5F 10221.0 NULL -1645852809 10221 -461152.37013168953 -10478 NULL -10221.0 103998.675 NULL NULL 5022 -10221.0 -1.04468841E8 -PREHOOK: query: EXPLAIN SELECT csmallint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1716,7 +2057,8 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT csmallint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1736,6 +2078,10 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1754,15 +2100,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1, val -257)(children: col 1) -> boolean, FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val -6432, col 1)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterLongColLessEqualLongColumn(col 0, col 2)(children: col 0) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) outputColumnNames: csmallint, cbigint, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3, 0] Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1775,8 +2140,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) @@ -1795,16 +2173,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: decimal(10,9)), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1882,7 +2277,8 @@ POSTHOOK: Input: default@alltypesorc -89 -14 0.0 0.015606742 NULL NULL 14 0.0 -14 1 89011 -95 -20 0.0 0.014621053 NULL NULL 20 0.0 -20 1 89011 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN SELECT cdouble, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -1909,7 +2305,8 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -1936,6 +2333,10 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1954,15 +2355,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 293580 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 2563.58) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3, col 2)(children: col 2) -> boolean, FilterLongColLessLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterDecimalScalarEqualDecimalColumn(val 2563.58, col 12)(children: CastLongToDecimal(col 0) -> 12:decimal(6,2)) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val -5638.15, col 14)(children: CastLongToDecimal(col 3) -> 14:decimal(21,2)) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cfloat (type: float) outputColumnNames: cdouble, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 4] Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) + Group By Vectorization: + aggregators: VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFSumDouble(col 5) -> double + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 5 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -1975,8 +2395,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) @@ -1995,13 +2428,27 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col12 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2073,7 +2520,8 @@ ORDER BY cdouble POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -PREHOOK: query: EXPLAIN SELECT ctimestamp1, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2128,7 +2576,8 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2183,6 +2632,10 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2201,15 +2654,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColNotEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean, SelectColumnIsNotNull(col 11) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss) -> boolean, FilterDoubleScalarLessDoubleColumn(val -3.0, col 12)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean) -> boolean, FilterDoubleColEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 10) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 6, 2, 1, 0, 4, 5] Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) + Group By Vectorization: + aggregators: VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFVarSampLong(col 1) -> struct, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFVarSampDouble(col 4) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFVarPopLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 8, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2222,8 +2694,21 @@ STAGE PLANS: value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) @@ -2242,16 +2727,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: tinyint), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: double), KEY.reducesinkkey22 (type: double), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey24 (type: double), KEY.reducesinkkey25 (type: double), KEY.reducesinkkey26 (type: double), KEY.reducesinkkey27 (type: tinyint), KEY.reducesinkkey28 (type: double), KEY.reducesinkkey29 (type: double), KEY.reducesinkkey30 (type: double), KEY.reducesinkkey31 (type: double), KEY.reducesinkkey32 (type: decimal(8,6)), KEY.reducesinkkey33 (type: double), KEY.reducesinkkey34 (type: bigint), KEY.reducesinkkey35 (type: double), KEY.reducesinkkey36 (type: bigint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey38 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 8, 38] Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 25172 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 25172 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2428,7 +2930,8 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:46.82 NULL NULL NULL NULL 15601.0 NULL NULL 1 -1 NULL -46 NULL NULL 0.0 NULL NULL NULL 0.0 6.522017819370554E-4 NULL 0.0 NULL NULL -46.0 NULL 6.522017819364598E-4 46 15601.0 0.0 NULL NULL -0.571304 0.0 NULL NULL 1 1 NULL 1969-12-31 15:59:46.847 NULL NULL NULL NULL -7196.0 NULL NULL 1 -1 NULL -26 NULL NULL 0.0 NULL NULL NULL 0.0 -0.0014139799888827128 NULL 0.0 NULL NULL -26.0 NULL 0.001413979988882123 26 -7196.0 0.0 NULL NULL -1.010769 0.0 NULL NULL 1 1 NULL 1969-12-31 15:59:46.915 NULL NULL NULL NULL -200.0 NULL NULL 1 -1 NULL -25 NULL NULL 0.0 NULL NULL NULL 0.0 -0.050875000000000004 NULL 0.0 NULL NULL -25.0 NULL 0.0 25 -200.0 0.0 NULL NULL -1.051200 0.0 NULL NULL 1 1 NULL -PREHOOK: query: EXPLAIN SELECT cboolean1, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2468,7 +2971,8 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cboolean1, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2508,6 +3012,10 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2526,15 +3034,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 4, 3, 2, 5, 0, 1] Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(col 4) -> float, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFMinLong(col 3) -> bigint, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFAvgLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -2547,8 +3074,21 @@ STAGE PLANS: value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) @@ -2567,13 +3107,27 @@ STAGE PLANS: value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(23,3)), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: float), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col12 (type: float), VALUE._col15 (type: bigint), VALUE._col16 (type: double), VALUE._col17 (type: decimal(24,3)), VALUE._col18 (type: decimal(25,3)), VALUE._col19 (type: double), VALUE._col20 (type: decimal(25,3)), VALUE._col21 (type: double), VALUE._col22 (type: double), VALUE._col23 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24] Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2680,12 +3234,16 @@ POSTHOOK: query: create table test_count(i int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_count -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2703,29 +3261,71 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2747,12 +3347,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_count #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(i) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(i) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2770,31 +3374,73 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: i (type: int) outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count(i) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2890,12 +3536,16 @@ POSTHOOK: Lineage: alltypesnullorc.cstring2 SIMPLE [(alltypesnull)alltypesnull.F POSTHOOK: Lineage: alltypesnullorc.ctimestamp1 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctimestamp2 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctinyint SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2913,29 +3563,71 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2957,12 +3649,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 12288 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2980,31 +3676,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3026,12 +3764,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3049,31 +3791,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3095,12 +3879,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3118,31 +3906,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cfloat) + Group By Vectorization: + aggregators: VectorUDAFCount(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3164,12 +3994,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3187,31 +4021,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cstring1 (type: string) outputColumnNames: cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cstring1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 6) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3233,12 +4109,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3256,31 +4136,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cboolean1 (type: boolean) outputColumnNames: cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cboolean1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 10) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index d0efe00..d06ae66 100644 --- ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -117,33 +121,71 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -187,12 +229,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -210,17 +256,36 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -237,6 +302,10 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -280,12 +349,16 @@ POSTHOOK: Input: default@vsmb_bucket_rc 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -303,17 +376,36 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -330,6 +422,10 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 6e13369..b58e707 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -48,15 +52,30 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -64,6 +83,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -113,7 +140,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -131,7 +158,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -149,6 +176,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -163,15 +194,30 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -179,6 +225,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_casts.q.out ql/src/test/results/clientpositive/llap/vectorized_casts.q.out index c377ac8..db8df66 100644 --- ql/src/test/results/clientpositive/llap/vectorized_casts.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_casts.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select cast (ctinyint as boolean) @@ -72,7 +72,7 @@ from alltypesorc where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select cast (ctinyint as boolean) @@ -146,6 +146,10 @@ from alltypesorc where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -176,6 +180,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_context.q.out ql/src/test/results/clientpositive/llap/vectorized_context.q.out index 1f70a01..855a50f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_context.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_context.q.out @@ -82,20 +82,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@household_demographics POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,6 +132,14 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -176,6 +188,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -195,6 +215,14 @@ STAGE PLANS: Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index 40f5b73..892a6de 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -197,7 +197,7 @@ date_udf_flight_orc.fl_date date_udf_flight_orc.fl_time 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, to_unix_timestamp(fl_time), year(fl_time), @@ -220,7 +220,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, timestamp "2007-03-14 08:21:59") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, to_unix_timestamp(fl_time), year(fl_time), @@ -244,20 +244,49 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: fl_time (type: timestamp), to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int), datediff(fl_time, 2000-01-01) (type: int), datediff(fl_time, 2000-01-01 00:00:00.0) (type: int), datediff(fl_time, 2000-01-01 11:13:09.0) (type: int), datediff(fl_time, '2007-03-14') (type: int), datediff(fl_time, 2007-03-14) (type: int), datediff(fl_time, 2007-03-14 00:00:00.0) (type: int), datediff(fl_time, 2007-03-14 08:21:59.0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: exception: java.lang.NullPointerException stack trace: java.lang.String.(String.java:515), org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateDiffColScalar.vectorExpressionParameters(VectorUDFDateDiffColScalar.java:304), org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression.toString(VectorExpression.java:163), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:601), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2183), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2173), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateSelectOperator(Vectorizer.java:1893), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateMapWorkOperator(Vectorizer.java:1724), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$MapWorkValidationNodeProcessor.process(Vectorizer.java:1354), org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:158), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateMapWork(Vectorizer.java:1082), ... + vectorized: false + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_time (type: timestamp), to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int), datediff(fl_time, 2000-01-01) (type: int), datediff(fl_time, 2000-01-01 00:00:00.0) (type: int), datediff(fl_time, 2000-01-01 11:13:09.0) (type: int), datediff(fl_time, '2007-03-14') (type: int), datediff(fl_time, 2007-03-14) (type: int), datediff(fl_time, 2007-03-14 00:00:00.0) (type: int), datediff(fl_time, 2007-03-14 08:21:59.0) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - ListSink + ListSink PREHOOK: query: SELECT fl_time, @@ -447,7 +476,7 @@ fl_time _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 _c13 _c14 _c15 _c16 _ 2010-10-31 07:00:00 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 07:00:00 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 07:00:00 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_unix_timestamp(fl_date), year(fl_date), @@ -470,7 +499,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, timestamp "2007-03-14 08:21:59") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_unix_timestamp(fl_date), year(fl_date), @@ -494,20 +523,49 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: fl_date (type: date), to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int), datediff(fl_date, 2000-01-01) (type: int), datediff(fl_date, 2000-01-01 00:00:00.0) (type: int), datediff(fl_date, 2000-01-01 11:13:09.0) (type: int), datediff(fl_date, '2007-03-14') (type: int), datediff(fl_date, 2007-03-14) (type: int), datediff(fl_date, 2007-03-14 00:00:00.0) (type: int), datediff(fl_date, 2007-03-14 08:21:59.0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: exception: java.lang.NullPointerException stack trace: java.lang.String.(String.java:515), org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateDiffColScalar.vectorExpressionParameters(VectorUDFDateDiffColScalar.java:304), org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression.toString(VectorExpression.java:163), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:601), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2183), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2173), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateSelectOperator(Vectorizer.java:1893), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateMapWorkOperator(Vectorizer.java:1724), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$MapWorkValidationNodeProcessor.process(Vectorizer.java:1354), org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:158), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateMapWork(Vectorizer.java:1082), ... + vectorized: false + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_date (type: date), to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int), datediff(fl_date, 2000-01-01) (type: int), datediff(fl_date, 2000-01-01 00:00:00.0) (type: int), datediff(fl_date, 2000-01-01 11:13:09.0) (type: int), datediff(fl_date, '2007-03-14') (type: int), datediff(fl_date, 2007-03-14) (type: int), datediff(fl_date, 2007-03-14 00:00:00.0) (type: int), datediff(fl_date, 2007-03-14 08:21:59.0) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - ListSink + ListSink PREHOOK: query: SELECT fl_date, @@ -697,7 +755,7 @@ fl_date _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 _c13 _c14 _c15 _c16 _ 2010-10-31 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, fl_date, year(fl_time) = year(fl_date), @@ -722,7 +780,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, "2007-03-14") = datediff(fl_date, date "2007-03-14") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, fl_date, year(fl_time) = year(fl_date), @@ -748,20 +806,49 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: fl_time (type: timestamp), fl_date (type: date), (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean), (datediff(fl_time, 2000-01-01) = datediff(fl_date, 2000-01-01)) (type: boolean), (datediff(fl_time, 2000-01-01 00:00:00.0) = datediff(fl_date, 2000-01-01 00:00:00.0)) (type: boolean), (datediff(fl_time, 2000-01-01 11:13:09.0) = datediff(fl_date, 2000-01-01 11:13:09.0)) (type: boolean), (datediff(fl_time, '2007-03-14') = datediff(fl_date, '2007-03-14')) (type: boolean), (datediff(fl_time, 2007-03-14) = datediff(fl_date, 2007-03-14)) (type: boolean), (datediff(fl_time, 2007-03-14 00:00:00.0) = datediff(fl_date, 2007-03-14 00:00:00.0)) (type: boolean), (datediff(fl_time, 2007-03-14 08:21:59.0) = datediff(fl_date, 2007-03-14 08:21:59.0)) (type: boolean), (datediff(fl_date, '2000-01-01') = datediff(fl_date, 2000-01-01)) (type: boolean), (datediff(fl_date, '2007-03-14') = datediff(fl_date, 2007-03-14)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: java.lang.NullPointerException + vectorized: false + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_time (type: timestamp), fl_date (type: date), (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean), (datediff(fl_time, 2000-01-01) = datediff(fl_date, 2000-01-01)) (type: boolean), (datediff(fl_time, 2000-01-01 00:00:00.0) = datediff(fl_date, 2000-01-01 00:00:00.0)) (type: boolean), (datediff(fl_time, 2000-01-01 11:13:09.0) = datediff(fl_date, 2000-01-01 11:13:09.0)) (type: boolean), (datediff(fl_time, '2007-03-14') = datediff(fl_date, '2007-03-14')) (type: boolean), (datediff(fl_time, 2007-03-14) = datediff(fl_date, 2007-03-14)) (type: boolean), (datediff(fl_time, 2007-03-14 00:00:00.0) = datediff(fl_date, 2007-03-14 00:00:00.0)) (type: boolean), (datediff(fl_time, 2007-03-14 08:21:59.0) = datediff(fl_date, 2007-03-14 08:21:59.0)) (type: boolean), (datediff(fl_date, '2000-01-01') = datediff(fl_date, 2000-01-01)) (type: boolean), (datediff(fl_date, '2007-03-14') = datediff(fl_date, 2007-03-14)) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - ListSink + ListSink PREHOOK: query: SELECT fl_time, @@ -955,7 +1042,7 @@ fl_time fl_date _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 _c13 _c14 _c15 _c 2010-10-31 07:00:00 2010-10-31 true true true true true true true true true true true true true true true true true true true true 2010-10-31 07:00:00 2010-10-31 true true true true true true true true true true true true true true true true true true true true 2010-10-31 07:00:00 2010-10-31 true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -964,7 +1051,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -974,22 +1061,68 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc LIMIT 10 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 4, 5, 6, 8] + selectExpressions: VectorUDFDateLong(col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 3:date, VectorUDFDateLong(col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 4:date, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 5:long, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 6:long, VectorUDFDateDiffColCol(col 2, col 7)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date, VectorUDFDateSubColScalar(col 0, val 2) -> 7:date) -> 8:long + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT fl_date, @@ -1034,7 +1167,7 @@ POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### _c0 2009-07-30 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -1042,7 +1175,7 @@ PREHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -1051,6 +1184,10 @@ FROM date_udf_flight_orc ORDER BY c1 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1069,43 +1206,101 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(fl_date), max(fl_date), count(fl_date), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 0) -> date, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 1) -> date, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index 433bcba..6000ec4 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -16,10 +16,14 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dtest POSTHOOK: Lineage: dtest.a SCRIPT [] POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -57,8 +61,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -88,10 +107,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dtest #### A masked pattern was here #### 300 1 -PREHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -126,8 +149,23 @@ STAGE PLANS: Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -145,6 +183,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct) Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2), std(VALUE._col3) diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 203ded8..0b9be24 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -34,10 +34,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 11 12 -PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds +PREHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds +POSTHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds POSTHOOK: type: CREATETABLE_AS_SELECT +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -74,8 +78,19 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -198,10 +213,14 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_double_hour POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -232,6 +251,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -267,6 +290,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -288,6 +319,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -327,10 +365,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -361,6 +403,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -381,6 +427,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -402,6 +456,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -454,12 +515,16 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -491,6 +556,10 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -526,6 +595,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -561,6 +638,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -598,6 +683,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -641,12 +733,16 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -679,6 +775,10 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -699,6 +799,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -719,6 +827,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -756,6 +872,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -810,10 +933,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -843,6 +970,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -893,6 +1024,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -914,6 +1053,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -953,10 +1099,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -987,6 +1137,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1007,6 +1161,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1028,6 +1190,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1078,10 +1247,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1112,6 +1285,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1147,6 +1324,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1168,6 +1353,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1207,10 +1399,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1241,6 +1437,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1261,6 +1461,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1282,6 +1490,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1330,10 +1545,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1364,6 +1583,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1399,6 +1622,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1420,6 +1651,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1459,10 +1697,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1493,6 +1735,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1528,6 +1774,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1549,6 +1803,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1588,10 +1849,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1622,6 +1887,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1642,6 +1911,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1663,6 +1940,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1702,10 +1986,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1736,6 +2024,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1756,6 +2048,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1777,6 +2077,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1829,10 +2136,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1863,6 +2174,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1898,6 +2213,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1919,6 +2242,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1972,10 +2302,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2003,6 +2337,10 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2023,6 +2361,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2044,6 +2386,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2059,6 +2408,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -2105,10 +2461,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2137,6 +2497,10 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2156,6 +2520,14 @@ STAGE PLANS: value expressions: _col0 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2183,6 +2555,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2223,10 +2602,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 1500 -PREHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2256,6 +2639,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2306,6 +2693,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2327,6 +2722,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2366,10 +2768,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2400,6 +2806,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2435,6 +2845,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2456,6 +2874,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2476,10 +2901,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2528,6 +2957,14 @@ STAGE PLANS: Target Vertex: Map 4 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2544,6 +2981,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2565,6 +3006,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2585,10 +3033,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2618,6 +3070,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2653,6 +3109,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2674,6 +3138,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2694,12 +3165,16 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2731,6 +3206,10 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -2766,6 +3245,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -2801,6 +3288,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2838,6 +3333,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2877,12 +3379,16 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2920,6 +3426,12 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: unknown + Map Vectorization: + enabled: true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 6 Map Operator Tree: TableScan @@ -2940,6 +3452,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -2958,8 +3478,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: java.lang.NullPointerException + vectorized: false Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3010,6 +3536,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3025,6 +3558,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) @@ -3057,10 +3597,14 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3094,6 +3638,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -3114,6 +3662,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 8 Map Operator Tree: TableScan @@ -3134,6 +3686,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3155,6 +3711,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3170,6 +3733,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3191,6 +3761,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3219,6 +3796,13 @@ STAGE PLANS: Target Vertex: Map 1 Reducer 9 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3264,10 +3848,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2000 -PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3301,6 +3889,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -3321,6 +3913,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 8 Map Operator Tree: TableScan @@ -3341,6 +3937,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3364,6 +3964,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3379,6 +3986,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3400,6 +4014,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3428,6 +4049,13 @@ STAGE PLANS: Target Vertex: Map 1 Reducer 9 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3474,10 +4102,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2008-04-08 2008-04-09 -PREHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3513,6 +4145,10 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 11 Map Operator Tree: TableScan @@ -3533,6 +4169,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -3551,6 +4191,10 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 7 Map Operator Tree: TableScan @@ -3571,8 +4215,19 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 10 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3616,6 +4271,13 @@ STAGE PLANS: Target Vertex: Map 5 Reducer 12 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3637,6 +4299,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3668,6 +4337,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3681,6 +4357,13 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3731,10 +4414,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 2008-04-08 2008-04-09 2008-04-09 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3778,6 +4465,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -3813,8 +4504,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3867,12 +4573,16 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3925,6 +4635,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -3960,6 +4674,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -3995,8 +4717,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4051,10 +4788,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4097,6 +4838,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4147,8 +4892,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4199,10 +4959,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4246,6 +5010,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4281,8 +5049,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4322,10 +5105,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4369,6 +5156,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4404,8 +5195,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4445,10 +5251,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4492,6 +5302,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4527,8 +5341,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4582,10 +5411,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product -PREHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4612,6 +5445,10 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -4632,8 +5469,19 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -4662,6 +5510,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4709,10 +5564,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4756,6 +5615,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4791,8 +5654,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4813,10 +5691,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4863,6 +5745,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -4879,8 +5769,19 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4901,10 +5802,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4934,6 +5839,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -4967,8 +5876,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4989,12 +5913,16 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5047,6 +5975,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -5082,6 +6014,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -5117,8 +6057,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5158,12 +6113,16 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5198,6 +6157,12 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: unknown + Map Vectorization: + enabled: true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 2 Map Operator Tree: TableScan @@ -5241,6 +6206,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -5261,8 +6234,23 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5298,10 +6286,14 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5334,6 +6326,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -5354,6 +6350,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 7 Map Operator Tree: TableScan @@ -5374,8 +6374,19 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -5397,6 +6408,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -5425,6 +6443,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -5440,6 +6465,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -5521,10 +6553,14 @@ POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(s POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 29f2391..e6b65da 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -24,10 +24,14 @@ POSTHOOK: Output: default@dsrv_small POSTHOOK: Lineage: dsrv_small.key_int EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dsrv_small.key_str SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dsrv_small.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,53 +53,114 @@ STAGE PLANS: alias: a filterExpr: (key_int is not null and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1) -> boolean, FilterLongColumnBetweenDynamicValue(col 1, left 0, right 0) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean predicate: (key_int is not null and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: b filterExpr: key_int is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: key_int is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -108,6 +173,10 @@ STAGE PLANS: Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -117,14 +186,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -132,14 +217,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) @@ -160,10 +263,14 @@ POSTHOOK: Input: default@dsrv_big POSTHOOK: Input: default@dsrv_small #### A masked pattern was here #### 84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -197,41 +304,85 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: java.lang.NullPointerException + vectorized: false Map 4 Map Operator Tree: TableScan alias: b filterExpr: key_str is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key_str is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -244,6 +395,10 @@ STAGE PLANS: Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -253,14 +408,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -268,14 +439,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -296,10 +485,14 @@ POSTHOOK: Input: default@dsrv_big POSTHOOK: Input: default@dsrv_small #### A masked pattern was here #### 84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -333,41 +526,85 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: java.lang.NullPointerException + vectorized: false Map 4 Map Operator Tree: TableScan alias: b filterExpr: key_str is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key_str is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -380,6 +617,10 @@ STAGE PLANS: Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -389,14 +630,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -404,14 +661,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -432,10 +707,14 @@ POSTHOOK: Input: default@dsrv_big POSTHOOK: Input: default@dsrv_small #### A masked pattern was here #### 84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -458,86 +737,185 @@ STAGE PLANS: alias: a filterExpr: (key_int is not null and key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter)) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter))) (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1) -> boolean, FilterLongColumnBetweenDynamicValue(col 1, left 0, right 0) -> boolean, FilterLongColumnBetweenDynamicValue(col 1, left 0, right 0) -> boolean, VectorInBloomFilterColDynamicValue -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean predicate: (key_int is not null and key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter)) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter))) (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: b filterExpr: key_int is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: key_int is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan alias: c filterExpr: key_int is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: key_int is not null (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -552,6 +930,10 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 198000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -561,14 +943,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -576,26 +974,62 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) @@ -616,10 +1050,14 @@ POSTHOOK: Input: default@dsrv_big POSTHOOK: Input: default@dsrv_small #### A masked pattern was here #### 84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -654,54 +1092,113 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: java.lang.NullPointerException + vectorized: false Map 4 Map Operator Tree: TableScan alias: b filterExpr: (key_str is not null and key_int is not null) (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key_str is not null and key_int is not null) (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string), key_int (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -714,6 +1211,10 @@ STAGE PLANS: Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -723,14 +1224,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -738,26 +1255,62 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) @@ -778,10 +1331,14 @@ POSTHOOK: Input: default@dsrv_big POSTHOOK: Input: default@dsrv_small #### A masked pattern was here #### 84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2') +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2') +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -803,53 +1360,114 @@ STAGE PLANS: alias: a filterExpr: (key_int is not null and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1) -> boolean, FilterLongColumnBetweenDynamicValue(col 1, left 0, right 0) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean predicate: (key_int is not null and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: b filterExpr: ((value) IN ('nonexistent1', 'nonexistent2') and key_int is not null) (type: boolean) Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringColumnInList(col 2, values 11011111010112010511511610111011649, 11011111010112010511511610111011650) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((value) IN ('nonexistent1', 'nonexistent2') and key_int is not null) (type: boolean) Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=29) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -862,6 +1480,10 @@ STAGE PLANS: Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -871,14 +1493,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -886,14 +1524,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=29) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index d47a04d..33d2e3d 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -24,12 +28,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -37,6 +52,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -44,9 +63,21 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 2, 12] + selectExpressions: LongColAddLongColumn(col 2, col 2) -> 12:long Statistics: Num rows: 19518 Data size: 156144 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgLong(col 12) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 12) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE @@ -56,27 +87,63 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out new file mode 100644 index 0000000..7621cbe --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out @@ -0,0 +1,210 @@ +PREHOOK: query: create temporary table x (a int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: create temporary table x (a int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +PREHOOK: query: create temporary table y (b int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@y +POSTHOOK: query: create temporary table y (b int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@y +PREHOOK: query: insert into x values(1) +PREHOOK: type: QUERY +PREHOOK: Output: default@x +POSTHOOK: query: insert into x values(1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@x +POSTHOOK: Lineage: x.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into y values(1) +PREHOOK: type: QUERY +PREHOOK: Output: default@y +POSTHOOK: query: insert into y values(1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@y +POSTHOOK: Lineage: y.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain vectorization expression +select count(1) from x, y where a = b +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(1) from x, y where a = b +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: a is not null (type: boolean) + Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + input vertices: + 1 Map 3 + Statistics: Num rows: 49 Data size: 199 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 1:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: b is not null (type: boolean) + Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from x, y where a = b +PREHOOK: type: QUERY +PREHOOK: Input: default@x +PREHOOK: Input: default@y +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from x, y where a = b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x +POSTHOOK: Input: default@y +#### A masked pattern was here #### +1 diff --git ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out index da862b9..5901b09 100644 --- ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -50,7 +50,7 @@ where cbigint % 500 = 0 and sin(cfloat) >= -1.0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -102,22 +102,69 @@ where cbigint % 500 = 0 and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 293580 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 500) -> 12:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 13, val -1.0)(children: FuncSinDoubleToDouble(col 4) -> 13:double) -> boolean) -> boolean + predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) + Statistics: Num rows: 2048 Data size: 48960 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 13, 12, 14, 15, 16, 18, 17, 19, 20, 21, 23, 22, 24, 25, 26, 27, 28, 30, 31, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 5, 3, 46, 47, 48, 49] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 13:double, FuncFloorDoubleToLong(col 5) -> 12:long, FuncCeilDoubleToLong(col 5) -> 14:long, FuncRandNoSeed -> 15:double, FuncRand -> 16:double, FuncExpDoubleToDouble(col 17)(children: FuncLnDoubleToDouble(col 5) -> 17:double) -> 18:double, FuncLnDoubleToDouble(col 5) -> 17:double, FuncLnDoubleToDouble(col 4) -> 19:double, FuncLog10DoubleToDouble(col 5) -> 20:double, FuncLog2DoubleToDouble(col 5) -> 21:double, FuncLog2DoubleToDouble(col 22)(children: DoubleColSubtractDoubleScalar(col 5, val 15601.0) -> 22:double) -> 23:double, FuncLog2DoubleToDouble(col 4) -> 22:double, FuncLog2LongToDouble(col 3) -> 24:double, FuncLog2LongToDouble(col 2) -> 25:double, FuncLog2LongToDouble(col 1) -> 26:double, FuncLog2LongToDouble(col 0) -> 27:double, VectorUDFAdaptor(log(2, cdouble)) -> 28:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 30:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 31:double, FuncSqrtDoubleToDouble(col 5) -> 29:double, FuncSqrtLongToDouble(col 3) -> 32:double, FuncBin(col 3) -> 33:String, VectorUDFAdaptor(hex(cdouble)) -> 34:string, VectorUDFAdaptor(conv(cbigint, 10, 16)) -> 35:string, FuncAbsDoubleToDouble(col 5) -> 36:double, FuncAbsLongToLong(col 0) -> 37:long, PosModLongToLong(col 2, divisor 3) -> 38:long, FuncSinDoubleToDouble(col 5) -> 39:double, FuncASinDoubleToDouble(col 5) -> 40:double, FuncCosDoubleToDouble(col 5) -> 41:double, FuncACosDoubleToDouble(col 5) -> 42:double, FuncATanDoubleToDouble(col 5) -> 43:double, FuncDegreesDoubleToDouble(col 5) -> 44:double, FuncRadiansDoubleToDouble(col 5) -> 45:double, DoubleColUnaryMinus(col 5) -> 46:double, FuncSignDoubleToDouble(col 5) -> 47:double, FuncSignLongToDouble(col 3) -> 48:double, FuncCosDoubleToDouble(col 50)(children: DoubleColAddDoubleScalar(col 49, val 3.14159)(children: DoubleColUnaryMinus(col 50)(children: FuncSinDoubleToDouble(col 49)(children: FuncLnDoubleToDouble(col 5) -> 49:double) -> 50:double) -> 49:double) -> 50:double) -> 49:double + Statistics: Num rows: 2048 Data size: 1724272 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2048 Data size: 1724272 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) - Select Operator - expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 - ListSink + ListSink PREHOOK: query: select cdouble diff --git ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index 5a61859..ed28530 100644 --- ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +PREHOOK: query: explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +POSTHOOK: query: explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,6 +66,14 @@ STAGE PLANS: value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -82,6 +94,14 @@ STAGE PLANS: value expressions: _col1 (type: smallint), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -101,8 +121,23 @@ STAGE PLANS: Statistics: Num rows: 9174 Data size: 27400 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index e42453d..73d9784 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -46,16 +46,20 @@ POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldS POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select * +PREHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * +POSTHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -99,7 +103,7 @@ POSTHOOK: Input: default@alltypes_parquet 528534767 27 -7824 27.0 -7824.0 cvLH6Eat2yFsyy7p 528534767 -11 -15431 -11.0 -15431.0 cvLH6Eat2yFsyy7p 528534767 61 -15549 61.0 -15549.0 cvLH6Eat2yFsyy7p -PREHOOK: query: explain select ctinyint, +PREHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -108,7 +112,7 @@ PREHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain select ctinyint, +POSTHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -117,6 +121,10 @@ POSTHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -152,8 +160,21 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index 5576eb5..e65eb2e 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -115,11 +115,11 @@ POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types POSTHOOK: type: QUERY @@ -165,10 +165,10 @@ POSTHOOK: Input: default@parquet_types 119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 12.83 120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD 73.04 121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde 90.33 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types POSTHOOK: type: QUERY Plan optimized by CBO. @@ -211,7 +211,7 @@ uvwzy 5 abcdede 7 4.76 1 vwxyz 5 abcdede 7 12.83 1 wxyza 5 abcde 5 73.04 1 bcdef 5 abcde 5 90.33 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -223,7 +223,7 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index b40acf9..18aab49 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -104,7 +104,7 @@ POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_staging)part_staging.FieldSc POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_staging)part_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ] POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchema(name:p_size, type:int, comment:null), ] POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -114,7 +114,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -124,6 +124,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -154,6 +158,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -208,6 +220,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -239,6 +256,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -359,20 +381,24 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -408,6 +434,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -479,6 +513,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -555,6 +597,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -586,6 +633,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -689,18 +741,22 @@ Manufacturer#5 almond antique medium spring khaki 6 -25 Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -730,6 +786,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -784,6 +848,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -876,7 +945,7 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -886,7 +955,7 @@ from noop(on part_orc order by p_name ) abc PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -896,6 +965,10 @@ from noop(on part_orc order by p_name ) abc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -926,6 +999,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -980,6 +1061,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1011,6 +1097,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1131,7 +1222,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1141,7 +1232,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1151,6 +1242,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1181,6 +1276,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1235,6 +1338,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1266,6 +1374,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1387,7 +1500,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1398,7 +1511,7 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1409,6 +1522,10 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1439,6 +1556,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1493,6 +1618,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1532,6 +1662,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -1655,20 +1790,24 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1699,6 +1838,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1770,6 +1917,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1824,6 +1979,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -1942,20 +2102,24 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1989,6 +2153,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2057,6 +2229,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2150,6 +2330,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -2233,20 +2418,24 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2292,6 +2481,12 @@ STAGE PLANS: auto parallelism: true Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2346,6 +2541,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2377,6 +2577,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2478,7 +2683,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2487,7 +2692,7 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2496,6 +2701,10 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2542,6 +2751,12 @@ STAGE PLANS: auto parallelism: true Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2596,6 +2811,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2628,6 +2848,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2746,7 +2971,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2755,7 +2980,7 @@ from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2764,6 +2989,10 @@ from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2794,6 +3023,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2848,6 +3085,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2879,6 +3121,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2997,7 +3244,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3007,7 +3254,7 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3017,6 +3264,10 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3048,6 +3299,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3102,6 +3361,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3156,6 +3420,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3195,6 +3464,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3315,7 +3589,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3328,7 +3602,7 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3341,6 +3615,10 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3371,6 +3649,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3425,6 +3711,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3456,6 +3747,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3574,7 +3870,7 @@ Manufacturer#5 almond antique medium spring khaki 2 6208.18 Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.97 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.31 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3586,7 +3882,7 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3598,6 +3894,10 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3629,6 +3929,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3700,6 +4008,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3754,6 +4070,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3811,6 +4132,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3950,18 +4276,22 @@ Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35 6 -25 Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3992,6 +4322,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4046,6 +4384,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -4085,6 +4428,13 @@ STAGE PLANS: Reducer 3 Execution mode: vectorized, llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -4180,7 +4530,7 @@ POSTHOOK: Output: default@mfgr_price_view POSTHOOK: Lineage: mfgr_price_view.p_brand SIMPLE [(part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.p_mfgr SIMPLE [(part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.s EXPRESSION [(part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), ] -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4188,7 +4538,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4196,6 +4546,10 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4236,6 +4590,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4290,6 +4652,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -4327,6 +4694,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) @@ -4465,7 +4837,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4481,7 +4853,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4497,6 +4869,10 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -4533,6 +4909,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4587,6 +4971,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4627,6 +5016,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4705,6 +5099,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -4746,6 +5145,11 @@ STAGE PLANS: Reducer 5 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5014,7 +5418,7 @@ Manufacturer#5 almond antique medium spring khaki 6 8 2 2 0.4 31 Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5031,7 +5435,7 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5048,6 +5452,10 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5079,6 +5487,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5133,6 +5549,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5194,6 +5615,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5233,6 +5659,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5367,7 +5798,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5384,7 +5815,7 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5401,6 +5832,10 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5433,6 +5868,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5487,6 +5930,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5525,6 +5973,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5556,6 +6009,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5587,6 +6045,11 @@ STAGE PLANS: Reducer 5 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5721,7 +6184,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5736,7 +6199,7 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5751,6 +6214,10 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5782,6 +6249,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5836,6 +6311,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5874,6 +6354,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5912,6 +6397,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6042,7 +6532,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6059,7 +6549,7 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6076,6 +6566,10 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6108,6 +6602,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6162,6 +6664,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6200,6 +6707,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6247,6 +6759,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6279,6 +6796,11 @@ STAGE PLANS: Reducer 5 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6413,7 +6935,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6429,7 +6951,7 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6445,6 +6967,10 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6476,6 +7002,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6530,6 +7064,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6584,6 +7123,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6623,6 +7167,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6755,7 +7304,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6769,7 +7318,7 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6783,6 +7332,10 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6814,6 +7367,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6868,6 +7429,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6922,6 +7488,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6954,6 +7525,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index 0f900fa..e7c26fa 100644 --- ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -25,39 +29,85 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -75,6 +125,10 @@ STAGE PLANS: Statistics: Num rows: 19518 Data size: 156144 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE @@ -84,6 +138,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) @@ -97,13 +156,27 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out index 560235d..8e7e313 100644 --- ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -20,7 +20,7 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -42,22 +42,54 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 1816546 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) + Statistics: Num rows: 1024 Data size: 151470 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) - Select Operator - expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - ListSink + ListSink PREHOOK: query: select substr(cstring1, 1, 2) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 5193d20..63fff72 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -17,10 +17,10 @@ POSTHOOK: query: INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000') POSTHOOK: type: QUERY POSTHOOK: Output: default@test POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test POSTHOOK: type: QUERY Plan optimized by CBO. @@ -46,10 +46,10 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY Plan optimized by CBO. @@ -85,10 +85,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test POSTHOOK: type: QUERY Plan optimized by CBO. @@ -114,10 +114,10 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 0044841..d2f95f0 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -63,7 +63,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_wrong POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -76,7 +76,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -89,6 +89,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -106,26 +110,61 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -208,7 +247,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -221,7 +260,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -234,6 +273,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -251,26 +294,61 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampString(col 1) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -353,7 +431,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -366,7 +444,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -379,6 +457,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -396,26 +478,61 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12] + selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 11:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -498,7 +615,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -511,7 +628,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -524,6 +641,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -541,26 +662,61 @@ STAGE PLANS: TableScan alias: alltypesorc_wrong Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: VectorUDFUnixTimeStampString(col 0) -> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, fieldStart 17, fieldLength 2) -> 9:long Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -606,20 +762,24 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -637,31 +797,73 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 1) -> timestamp, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -693,14 +895,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL 0 40 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -733,20 +939,47 @@ STAGE PLANS: value expressions: _col0 (type: double) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) + vectorized: false Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: round(_col0, 3) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 3) -> 1:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -772,7 +1005,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -783,7 +1016,7 @@ PREHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -794,6 +1027,10 @@ POSTHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -811,12 +1048,26 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + Group By Vectorization: + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE @@ -826,8 +1077,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out index dbfe45e..9053c9b 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cast (ctinyint as timestamp) @@ -16,7 +16,7 @@ from alltypesorc where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cast (ctinyint as timestamp) @@ -34,22 +34,69 @@ from alltypesorc where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean + predicate: ((cbigint % 250) = 0) (type: boolean) + Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [14, 16, 18, 20, 21, 22, 24, 26, 8, 27, 29] + selectExpressions: CastMillisecondsLongToTimestamp(col 0) -> 14:timestamp, CastMillisecondsLongToTimestamp(col 1) -> 16:timestamp, CastMillisecondsLongToTimestamp(col 2) -> 18:timestamp, CastMillisecondsLongToTimestamp(col 3) -> 20:timestamp, CastDoubleToTimestamp(col 4) -> 21:timestamp, CastDoubleToTimestamp(col 5) -> 22:timestamp, CastMillisecondsLongToTimestamp(col 10) -> 24:timestamp, CastMillisecondsLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 26:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 27:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 28:string) -> 29:timestamp + Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: ((cbigint % 250) = 0) (type: boolean) - Select Operator - expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - ListSink + ListSink PREHOOK: query: select @@ -115,7 +162,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:59.95 1969-12-31 15:59:52.804 NULL 1969-12-19 17:33:32.75 1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:54.733 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-30 22:03:04.018 1970-01-21 12:50:53.75 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-27 18:49:09.583 1970-01-14 22:35:27 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cast (ctinyint as timestamp) @@ -133,7 +180,7 @@ from alltypesorc where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cast (ctinyint as timestamp) @@ -151,22 +198,69 @@ from alltypesorc where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean + predicate: ((cbigint % 250) = 0) (type: boolean) + Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19, 20, 8, 21, 23] + selectExpressions: CastLongToTimestamp(col 0) -> 13:timestamp, CastLongToTimestamp(col 1) -> 14:timestamp, CastLongToTimestamp(col 2) -> 15:timestamp, CastLongToTimestamp(col 3) -> 16:timestamp, CastDoubleToTimestamp(col 4) -> 17:timestamp, CastDoubleToTimestamp(col 5) -> 18:timestamp, CastLongToTimestamp(col 10) -> 19:timestamp, CastLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 20:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 21:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 22:string) -> 23:timestamp + Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: ((cbigint % 250) = 0) (type: boolean) - Select Operator - expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - ListSink + ListSink PREHOOK: query: select diff --git ql/src/test/results/clientpositive/llap_text.q.out ql/src/test/results/clientpositive/llap_text.q.out index acc41bf..8b059c8 100644 --- ql/src/test/results/clientpositive/llap_text.q.out +++ ql/src/test/results/clientpositive/llap_text.q.out @@ -216,6 +216,20 @@ POSTHOOK: query: select t, s, ts from text_llap2 order by t, s, ts limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@text_llap2 #### A masked pattern was here #### +-2 alice carson 2013-03-01 09:11:58.703074 +-2 alice nixon 2013-03-01 09:11:58.703321 +-2 alice underhill 2013-03-01 09:11:58.703122 +-2 alice underhill 2013-03-01 09:11:58.703127 +-2 alice xylophone 2013-03-01 09:11:58.703105 +-2 bob falkner 2013-03-01 09:11:58.703071 +-2 bob king 2013-03-01 09:11:58.703236 +-2 bob ovid 2013-03-01 09:11:58.703285 +-2 bob van buren 2013-03-01 09:11:58.703218 +-2 bob xylophone 2013-03-01 09:11:58.703219 +-2 calvin xylophone 2013-03-01 09:11:58.703083 +-2 david falkner 2013-03-01 09:11:58.703254 +-2 david laertes 2013-03-01 09:11:58.703076 +-2 david miller 2013-03-01 09:11:58.703238 -3 alice allen 2013-03-01 09:11:58.703323 -3 alice davidson 2013-03-01 09:11:58.703226 -3 alice falkner 2013-03-01 09:11:58.703304 @@ -302,20 +316,6 @@ POSTHOOK: Input: default@text_llap2 -3 yuri xylophone 2013-03-01 09:11:58.703258 -3 zach thompson 2013-03-01 09:11:58.703252 -3 zach young 2013-03-01 09:11:58.703191 --2 alice carson 2013-03-01 09:11:58.703074 --2 alice nixon 2013-03-01 09:11:58.703321 --2 alice underhill 2013-03-01 09:11:58.703122 --2 alice underhill 2013-03-01 09:11:58.703127 --2 alice xylophone 2013-03-01 09:11:58.703105 --2 bob falkner 2013-03-01 09:11:58.703071 --2 bob king 2013-03-01 09:11:58.703236 --2 bob ovid 2013-03-01 09:11:58.703285 --2 bob van buren 2013-03-01 09:11:58.703218 --2 bob xylophone 2013-03-01 09:11:58.703219 --2 calvin xylophone 2013-03-01 09:11:58.703083 --2 david falkner 2013-03-01 09:11:58.703254 --2 david laertes 2013-03-01 09:11:58.703076 --2 david miller 2013-03-01 09:11:58.703238 PREHOOK: query: select * from text_llap2 order by t, s, ts limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@text_llap2 @@ -324,106 +324,106 @@ POSTHOOK: query: select * from text_llap2 order by t, s, ts limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@text_llap2 #### A masked pattern was here #### --3 400 65557 4294967503 76.31 29.44 false alice allen 2013-03-01 09:11:58.703323 19 debate --3 384 65676 4294967453 71.97 31.52 false alice davidson 2013-03-01 09:11:58.703226 14 xylophone band --3 280 65597 4294967377 18.44 49.8 true alice falkner 2013-03-01 09:11:58.703304 74 zync studies --3 455 65570 4294967304 2.48 30.76 false alice king 2013-03-01 09:11:58.70314 42 forestry --3 458 65563 4294967315 62.77 41.5 false alice king 2013-03-01 09:11:58.703247 3 mathematics --3 485 65661 4294967441 26.21 16.19 false alice xylophone 2013-03-01 09:11:58.703129 97 topology --3 339 65671 4294967311 8.37 15.98 true bob ellison 2013-03-01 09:11:58.703261 14 linguistics +-2 305 65767 4294967529 76.54 4.72 true calvin xylophone 2013-03-01 09:11:58.703083 69 quiet hour +-2 331 65707 4294967335 67.12 13.51 false bob ovid 2013-03-01 09:11:58.703285 62 joggying +-2 373 65548 4294967423 16.98 43.6 true alice nixon 2013-03-01 09:11:58.703321 53 debate +-2 378 65553 4294967461 9.81 10.36 true bob king 2013-03-01 09:11:58.703236 91 opthamology +-2 389 65706 4294967488 26.68 17.93 false alice underhill 2013-03-01 09:11:58.703122 87 forestry +-2 389 65738 4294967520 99.45 26.26 true bob falkner 2013-03-01 09:11:58.703071 17 nap time +-2 393 65715 4294967305 48.3 1.85 true alice xylophone 2013-03-01 09:11:58.703105 30 values clariffication +-2 406 65582 4294967311 20.94 35.74 false bob van buren 2013-03-01 09:11:58.703218 25 opthamology +-2 406 65762 4294967443 1.79 33.42 false david falkner 2013-03-01 09:11:58.703254 58 opthamology +-2 407 65612 4294967318 25.48 41.56 true david laertes 2013-03-01 09:11:58.703076 40 forestry +-2 427 65666 4294967465 19.69 33.24 true bob xylophone 2013-03-01 09:11:58.703219 33 joggying +-2 446 65790 4294967302 6.49 10.81 false alice underhill 2013-03-01 09:11:58.703127 44 undecided +-2 450 65727 4294967487 94.57 30.4 false david miller 2013-03-01 09:11:58.703238 40 religion +-2 473 65565 4294967320 87.78 12.26 true alice carson 2013-03-01 09:11:58.703074 90 xylophone band -3 260 65595 4294967545 59.07 6.75 false bob falkner 2013-03-01 09:11:58.70328 37 chemistry --3 454 65733 4294967544 73.83 18.42 false bob ichabod 2013-03-01 09:11:58.70324 96 debate --3 374 65731 4294967388 22.35 22.71 true bob johnson 2013-03-01 09:11:58.703204 80 biology -3 264 65776 4294967398 20.95 5.97 false bob polk 2013-03-01 09:11:58.703128 93 joggying --3 465 65735 4294967298 72.3 22.58 false bob underhill 2013-03-01 09:11:58.703176 81 joggying --3 454 65627 4294967481 17.6 35.72 false bob underhill 2013-03-01 09:11:58.703188 67 religion --3 433 65654 4294967455 6.83 5.33 false bob van buren 2013-03-01 09:11:58.703199 29 yard duty --3 431 65635 4294967500 29.06 0.34 false calvin ichabod 2013-03-01 09:11:58.703213 29 undecided --3 280 65548 4294967350 52.3 33.06 true calvin white 2013-03-01 09:11:58.703295 30 quiet hour +-3 266 65736 4294967397 19.94 10.01 false quinn ellison 2013-03-01 09:11:58.703232 89 forestry +-3 268 65710 4294967448 82.74 12.48 true holly polk 2013-03-01 09:11:58.703273 15 undecided -3 270 65702 4294967512 38.05 1.07 true david carson 2013-03-01 09:11:58.703136 28 philosophy --3 469 65698 4294967357 47.51 49.22 true david falkner 2013-03-01 09:11:58.703305 78 joggying --3 485 65684 4294967483 11.83 8.04 false david garcia 2013-03-01 09:11:58.70319 63 wind surfing --3 408 65667 4294967509 81.68 45.9 true david hernandez 2013-03-01 09:11:58.703252 52 topology +-3 275 65543 4294967522 74.92 17.29 false mike king 2013-03-01 09:11:58.703214 53 opthamology +-3 275 65575 4294967441 38.22 2.43 true sarah xylophone 2013-03-01 09:11:58.703112 93 wind surfing +-3 275 65622 4294967302 71.78 8.49 false wendy robinson 2013-03-01 09:11:58.703294 95 undecided +-3 279 65661 4294967536 25.5 0.02 false wendy quirinius 2013-03-01 09:11:58.703266 75 undecided +-3 280 65548 4294967350 52.3 33.06 true calvin white 2013-03-01 09:11:58.703295 30 quiet hour +-3 280 65597 4294967377 18.44 49.8 true alice falkner 2013-03-01 09:11:58.703304 74 zync studies +-3 280 65769 4294967324 28.78 35.05 true xavier ovid 2013-03-01 09:11:58.703148 43 kindergarten +-3 284 65566 4294967400 62.81 39.1 false jessica white 2013-03-01 09:11:58.703199 70 opthamology +-3 286 65573 4294967493 18.27 23.71 false zach young 2013-03-01 09:11:58.703191 22 kindergarten +-3 289 65757 4294967528 56.2 44.24 true luke ichabod 2013-03-01 09:11:58.703294 7 yard duty -3 298 65720 4294967305 34.6 39.7 false ethan steinbeck 2013-03-01 09:11:58.703079 35 kindergarten +-3 299 65763 4294967542 85.96 10.45 true jessica miller 2013-03-01 09:11:58.703245 26 mathematics +-3 303 65617 4294967473 10.26 1.41 false ulysses quirinius 2013-03-01 09:11:58.703189 84 chemistry +-3 307 65634 4294967546 90.3 28.44 false irene underhill 2013-03-01 09:11:58.703298 85 forestry +-3 311 65569 4294967460 3.82 35.45 false luke garcia 2013-03-01 09:11:58.703076 93 chemistry +-3 313 65540 4294967316 25.67 39.88 false ulysses robinson 2013-03-01 09:11:58.703227 61 religion +-3 314 65670 4294967330 13.67 34.86 false wendy xylophone 2013-03-01 09:11:58.703191 85 mathematics +-3 315 65671 4294967412 94.22 25.96 true oscar johnson 2013-03-01 09:11:58.703133 89 nap time +-3 316 65696 4294967445 22.0 43.41 false priscilla laertes 2013-03-01 09:11:58.70325 51 values clariffication +-3 318 65553 4294967452 9.86 32.77 false holly underhill 2013-03-01 09:11:58.703219 47 wind surfing +-3 320 65644 4294967434 84.39 48.0 false sarah robinson 2013-03-01 09:11:58.703288 72 wind surfing +-3 324 65773 4294967296 11.07 25.95 true oscar miller 2013-03-01 09:11:58.70332 57 opthamology +-3 333 65562 4294967359 22.34 35.58 false ulysses steinbeck 2013-03-01 09:11:58.703259 87 xylophone band +-3 335 65696 4294967333 72.26 9.66 true nick nixon 2013-03-01 09:11:58.703083 85 philosophy +-3 337 65629 4294967521 55.59 6.54 true luke king 2013-03-01 09:11:58.703207 59 industrial engineering +-3 337 65658 4294967361 43.4 12.05 false victor allen 2013-03-01 09:11:58.703155 45 topology +-3 339 65671 4294967311 8.37 15.98 true bob ellison 2013-03-01 09:11:58.703261 14 linguistics -3 339 65737 4294967453 14.23 26.66 true ethan underhill 2013-03-01 09:11:58.703138 95 xylophone band +-3 343 65783 4294967378 7.1 18.16 true ulysses carson 2013-03-01 09:11:58.703253 97 mathematics +-3 344 65733 4294967363 0.56 11.96 true rachel thompson 2013-03-01 09:11:58.703276 88 wind surfing +-3 344 65756 4294967378 52.13 18.95 true victor thompson 2013-03-01 09:11:58.703299 81 topology +-3 346 65752 4294967298 56.05 34.03 false tom polk 2013-03-01 09:11:58.703217 49 zync studies +-3 350 65566 4294967434 23.22 6.68 true nick robinson 2013-03-01 09:11:58.703147 24 education +-3 362 65712 4294967325 43.73 48.74 false oscar garcia 2013-03-01 09:11:58.703282 30 chemistry +-3 374 65731 4294967388 22.35 22.71 true bob johnson 2013-03-01 09:11:58.703204 80 biology -3 376 65548 4294967431 96.78 43.23 false fred ellison 2013-03-01 09:11:58.703233 75 education --3 498 65751 4294967331 80.65 0.28 true gabriella brown 2013-03-01 09:11:58.703288 61 opthamology --3 505 65565 4294967407 68.73 4.65 true holly nixon 2013-03-01 09:11:58.703262 15 debate --3 268 65710 4294967448 82.74 12.48 true holly polk 2013-03-01 09:11:58.703273 15 undecided +-3 376 65766 4294967326 97.88 5.58 true sarah zipper 2013-03-01 09:11:58.703289 49 study skills +-3 381 65640 4294967379 59.34 7.97 false ulysses ellison 2013-03-01 09:11:58.703197 32 undecided -3 384 65613 4294967470 63.49 45.85 false holly steinbeck 2013-03-01 09:11:58.703242 54 chemistry +-3 384 65676 4294967453 71.97 31.52 false alice davidson 2013-03-01 09:11:58.703226 14 xylophone band +-3 386 65611 4294967331 58.81 22.43 true sarah miller 2013-03-01 09:11:58.70316 75 mathematics +-3 386 65716 4294967496 12.12 2.37 false zach thompson 2013-03-01 09:11:58.703252 16 linguistics -3 387 65550 4294967355 84.75 22.75 true holly thompson 2013-03-01 09:11:58.703073 52 biology --3 318 65553 4294967452 9.86 32.77 false holly underhill 2013-03-01 09:11:58.703219 47 wind surfing --3 458 65696 4294967418 45.24 8.49 false irene ellison 2013-03-01 09:11:58.703092 54 american history --3 307 65634 4294967546 90.3 28.44 false irene underhill 2013-03-01 09:11:58.703298 85 forestry +-3 400 65557 4294967503 76.31 29.44 false alice allen 2013-03-01 09:11:58.703323 19 debate +-3 408 65667 4294967509 81.68 45.9 true david hernandez 2013-03-01 09:11:58.703252 52 topology +-3 414 65608 4294967338 81.39 49.09 true tom steinbeck 2013-03-01 09:11:58.703251 11 xylophone band +-3 415 65571 4294967536 61.81 24.24 true victor robinson 2013-03-01 09:11:58.703305 23 american history +-3 423 65646 4294967378 63.19 34.04 false priscilla quirinius 2013-03-01 09:11:58.703228 35 xylophone band +-3 430 65667 4294967469 65.5 40.46 true yuri xylophone 2013-03-01 09:11:58.703258 31 american history +-3 431 65635 4294967500 29.06 0.34 false calvin ichabod 2013-03-01 09:11:58.703213 29 undecided +-3 432 65646 4294967492 0.83 27.18 true oscar davidson 2013-03-01 09:11:58.703071 56 linguistics +-3 433 65654 4294967455 6.83 5.33 false bob van buren 2013-03-01 09:11:58.703199 29 yard duty +-3 438 65618 4294967398 62.39 4.62 false victor xylophone 2013-03-01 09:11:58.703135 88 values clariffication +-3 447 65755 4294967320 43.69 20.03 false victor hernandez 2013-03-01 09:11:58.703176 14 forestry +-3 448 65610 4294967314 81.97 31.11 true mike xylophone 2013-03-01 09:11:58.703308 79 opthamology +-3 451 65696 4294967532 6.8 40.07 false luke young 2013-03-01 09:11:58.703182 27 biology +-3 454 65627 4294967481 17.6 35.72 false bob underhill 2013-03-01 09:11:58.703188 67 religion +-3 454 65705 4294967468 62.12 14.32 true mike white 2013-03-01 09:11:58.703087 40 joggying +-3 454 65733 4294967544 73.83 18.42 false bob ichabod 2013-03-01 09:11:58.70324 96 debate +-3 455 65570 4294967304 2.48 30.76 false alice king 2013-03-01 09:11:58.70314 42 forestry +-3 458 65563 4294967315 62.77 41.5 false alice king 2013-03-01 09:11:58.703247 3 mathematics -3 458 65679 4294967331 64.29 43.8 true irene young 2013-03-01 09:11:58.703084 3 american history --3 494 65589 4294967369 48.09 14.4 false jessica johnson 2013-03-01 09:11:58.703319 79 nap time +-3 458 65696 4294967418 45.24 8.49 false irene ellison 2013-03-01 09:11:58.703092 54 american history -3 459 65644 4294967456 92.71 0.08 false jessica king 2013-03-01 09:11:58.703279 53 joggying --3 299 65763 4294967542 85.96 10.45 true jessica miller 2013-03-01 09:11:58.703245 26 mathematics --3 284 65566 4294967400 62.81 39.1 false jessica white 2013-03-01 09:11:58.703199 70 opthamology --3 469 65577 4294967451 88.78 32.96 true katie ichabod 2013-03-01 09:11:58.703139 69 undecided --3 311 65569 4294967460 3.82 35.45 false luke garcia 2013-03-01 09:11:58.703076 93 chemistry --3 289 65757 4294967528 56.2 44.24 true luke ichabod 2013-03-01 09:11:58.703294 7 yard duty --3 337 65629 4294967521 55.59 6.54 true luke king 2013-03-01 09:11:58.703207 59 industrial engineering --3 451 65696 4294967532 6.8 40.07 false luke young 2013-03-01 09:11:58.703182 27 biology -3 465 65551 4294967457 83.39 46.64 true mike allen 2013-03-01 09:11:58.703292 53 values clariffication --3 275 65543 4294967522 74.92 17.29 false mike king 2013-03-01 09:11:58.703214 53 opthamology --3 500 65704 4294967480 2.26 28.79 true mike polk 2013-03-01 09:11:58.70319 4 nap time --3 454 65705 4294967468 62.12 14.32 true mike white 2013-03-01 09:11:58.703087 40 joggying --3 448 65610 4294967314 81.97 31.11 true mike xylophone 2013-03-01 09:11:58.703308 79 opthamology --3 335 65696 4294967333 72.26 9.66 true nick nixon 2013-03-01 09:11:58.703083 85 philosophy --3 350 65566 4294967434 23.22 6.68 true nick robinson 2013-03-01 09:11:58.703147 24 education --3 432 65646 4294967492 0.83 27.18 true oscar davidson 2013-03-01 09:11:58.703071 56 linguistics --3 362 65712 4294967325 43.73 48.74 false oscar garcia 2013-03-01 09:11:58.703282 30 chemistry +-3 465 65735 4294967298 72.3 22.58 false bob underhill 2013-03-01 09:11:58.703176 81 joggying +-3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 33 study skills +-3 469 65577 4294967451 88.78 32.96 true katie ichabod 2013-03-01 09:11:58.703139 69 undecided +-3 469 65698 4294967357 47.51 49.22 true david falkner 2013-03-01 09:11:58.703305 78 joggying -3 469 65752 4294967350 55.41 32.11 true oscar johnson 2013-03-01 09:11:58.70311 47 philosophy --3 315 65671 4294967412 94.22 25.96 true oscar johnson 2013-03-01 09:11:58.703133 89 nap time --3 324 65773 4294967296 11.07 25.95 true oscar miller 2013-03-01 09:11:58.70332 57 opthamology --3 316 65696 4294967445 22.0 43.41 false priscilla laertes 2013-03-01 09:11:58.70325 51 values clariffication --3 423 65646 4294967378 63.19 34.04 false priscilla quirinius 2013-03-01 09:11:58.703228 35 xylophone band +-3 477 65785 4294967464 97.51 10.84 true tom hernandez 2013-03-01 09:11:58.703108 7 history +-3 485 65661 4294967441 26.21 16.19 false alice xylophone 2013-03-01 09:11:58.703129 97 topology -3 485 65669 4294967428 21.34 13.07 false priscilla zipper 2013-03-01 09:11:58.703321 28 quiet hour --3 266 65736 4294967397 19.94 10.01 false quinn ellison 2013-03-01 09:11:58.703232 89 forestry +-3 485 65684 4294967483 11.83 8.04 false david garcia 2013-03-01 09:11:58.70319 63 wind surfing +-3 493 65662 4294967482 28.75 30.21 false xavier garcia 2013-03-01 09:11:58.703194 5 education +-3 494 65589 4294967369 48.09 14.4 false jessica johnson 2013-03-01 09:11:58.703319 79 nap time +-3 498 65751 4294967331 80.65 0.28 true gabriella brown 2013-03-01 09:11:58.703288 61 opthamology +-3 500 65704 4294967480 2.26 28.79 true mike polk 2013-03-01 09:11:58.70319 4 nap time +-3 505 65565 4294967407 68.73 4.65 true holly nixon 2013-03-01 09:11:58.703262 15 debate -3 507 65671 4294967305 60.28 41.5 false quinn polk 2013-03-01 09:11:58.703244 77 industrial engineering -3 507 65728 4294967525 81.95 47.14 true rachel davidson 2013-03-01 09:11:58.703316 31 study skills --3 344 65733 4294967363 0.56 11.96 true rachel thompson 2013-03-01 09:11:58.703276 88 wind surfing --3 386 65611 4294967331 58.81 22.43 true sarah miller 2013-03-01 09:11:58.70316 75 mathematics --3 320 65644 4294967434 84.39 48.0 false sarah robinson 2013-03-01 09:11:58.703288 72 wind surfing --3 275 65575 4294967441 38.22 2.43 true sarah xylophone 2013-03-01 09:11:58.703112 93 wind surfing --3 376 65766 4294967326 97.88 5.58 true sarah zipper 2013-03-01 09:11:58.703289 49 study skills --3 477 65785 4294967464 97.51 10.84 true tom hernandez 2013-03-01 09:11:58.703108 7 history --3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 33 study skills --3 346 65752 4294967298 56.05 34.03 false tom polk 2013-03-01 09:11:58.703217 49 zync studies --3 414 65608 4294967338 81.39 49.09 true tom steinbeck 2013-03-01 09:11:58.703251 11 xylophone band --3 343 65783 4294967378 7.1 18.16 true ulysses carson 2013-03-01 09:11:58.703253 97 mathematics --3 381 65640 4294967379 59.34 7.97 false ulysses ellison 2013-03-01 09:11:58.703197 32 undecided --3 303 65617 4294967473 10.26 1.41 false ulysses quirinius 2013-03-01 09:11:58.703189 84 chemistry --3 313 65540 4294967316 25.67 39.88 false ulysses robinson 2013-03-01 09:11:58.703227 61 religion --3 333 65562 4294967359 22.34 35.58 false ulysses steinbeck 2013-03-01 09:11:58.703259 87 xylophone band --3 337 65658 4294967361 43.4 12.05 false victor allen 2013-03-01 09:11:58.703155 45 topology --3 447 65755 4294967320 43.69 20.03 false victor hernandez 2013-03-01 09:11:58.703176 14 forestry --3 415 65571 4294967536 61.81 24.24 true victor robinson 2013-03-01 09:11:58.703305 23 american history --3 344 65756 4294967378 52.13 18.95 true victor thompson 2013-03-01 09:11:58.703299 81 topology --3 438 65618 4294967398 62.39 4.62 false victor xylophone 2013-03-01 09:11:58.703135 88 values clariffication --3 279 65661 4294967536 25.5 0.02 false wendy quirinius 2013-03-01 09:11:58.703266 75 undecided --3 275 65622 4294967302 71.78 8.49 false wendy robinson 2013-03-01 09:11:58.703294 95 undecided --3 314 65670 4294967330 13.67 34.86 false wendy xylophone 2013-03-01 09:11:58.703191 85 mathematics --3 493 65662 4294967482 28.75 30.21 false xavier garcia 2013-03-01 09:11:58.703194 5 education --3 280 65769 4294967324 28.78 35.05 true xavier ovid 2013-03-01 09:11:58.703148 43 kindergarten --3 430 65667 4294967469 65.5 40.46 true yuri xylophone 2013-03-01 09:11:58.703258 31 american history --3 386 65716 4294967496 12.12 2.37 false zach thompson 2013-03-01 09:11:58.703252 16 linguistics --3 286 65573 4294967493 18.27 23.71 false zach young 2013-03-01 09:11:58.703191 22 kindergarten --2 473 65565 4294967320 87.78 12.26 true alice carson 2013-03-01 09:11:58.703074 90 xylophone band --2 373 65548 4294967423 16.98 43.6 true alice nixon 2013-03-01 09:11:58.703321 53 debate --2 389 65706 4294967488 26.68 17.93 false alice underhill 2013-03-01 09:11:58.703122 87 forestry --2 446 65790 4294967302 6.49 10.81 false alice underhill 2013-03-01 09:11:58.703127 44 undecided --2 393 65715 4294967305 48.3 1.85 true alice xylophone 2013-03-01 09:11:58.703105 30 values clariffication --2 389 65738 4294967520 99.45 26.26 true bob falkner 2013-03-01 09:11:58.703071 17 nap time --2 378 65553 4294967461 9.81 10.36 true bob king 2013-03-01 09:11:58.703236 91 opthamology --2 331 65707 4294967335 67.12 13.51 false bob ovid 2013-03-01 09:11:58.703285 62 joggying --2 406 65582 4294967311 20.94 35.74 false bob van buren 2013-03-01 09:11:58.703218 25 opthamology --2 427 65666 4294967465 19.69 33.24 true bob xylophone 2013-03-01 09:11:58.703219 33 joggying --2 305 65767 4294967529 76.54 4.72 true calvin xylophone 2013-03-01 09:11:58.703083 69 quiet hour --2 406 65762 4294967443 1.79 33.42 false david falkner 2013-03-01 09:11:58.703254 58 opthamology --2 407 65612 4294967318 25.48 41.56 true david laertes 2013-03-01 09:11:58.703076 40 forestry --2 450 65727 4294967487 94.57 30.4 false david miller 2013-03-01 09:11:58.703238 40 religion PREHOOK: query: select t, f, s from text_llap2 order by t, s, f limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@text_llap2 @@ -432,106 +432,106 @@ POSTHOOK: query: select t, f, s from text_llap2 order by t, s, f limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@text_llap2 #### A masked pattern was here #### --3 76.31 alice allen --3 71.97 alice davidson +-2 1.79 david falkner +-2 16.98 alice nixon +-2 19.69 bob xylophone +-2 20.94 bob van buren +-2 25.48 david laertes +-2 26.68 alice underhill +-2 48.3 alice xylophone +-2 6.49 alice underhill +-2 67.12 bob ovid +-2 76.54 calvin xylophone +-2 87.78 alice carson +-2 9.81 bob king +-2 94.57 david miller +-2 99.45 bob falkner +-3 0.56 rachel thompson +-3 0.83 oscar davidson +-3 10.26 ulysses quirinius +-3 11.07 oscar miller +-3 11.83 david garcia +-3 12.12 zach thompson +-3 13.67 wendy xylophone +-3 14.23 ethan underhill +-3 17.6 bob underhill +-3 18.27 zach young -3 18.44 alice falkner +-3 19.94 quinn ellison +-3 2.26 mike polk -3 2.48 alice king --3 62.77 alice king --3 26.21 alice xylophone --3 8.37 bob ellison --3 59.07 bob falkner --3 73.83 bob ichabod --3 22.35 bob johnson -3 20.95 bob polk --3 17.6 bob underhill --3 72.3 bob underhill --3 6.83 bob van buren +-3 21.34 priscilla zipper +-3 22.0 priscilla laertes +-3 22.34 ulysses steinbeck +-3 22.35 bob johnson +-3 23.22 nick robinson +-3 25.5 wendy quirinius +-3 25.67 ulysses robinson +-3 26.21 alice xylophone +-3 28.75 xavier garcia +-3 28.78 xavier ovid -3 29.06 calvin ichabod --3 52.3 calvin white +-3 3.82 luke garcia +-3 34.6 ethan steinbeck -3 38.05 david carson +-3 38.22 sarah xylophone +-3 43.4 victor allen +-3 43.69 victor hernandez +-3 43.73 oscar garcia +-3 45.24 irene ellison -3 47.51 david falkner --3 11.83 david garcia --3 81.68 david hernandez --3 34.6 ethan steinbeck --3 14.23 ethan underhill --3 96.78 fred ellison --3 80.65 gabriella brown --3 68.73 holly nixon --3 82.74 holly polk --3 63.49 holly steinbeck --3 84.75 holly thompson --3 9.86 holly underhill --3 45.24 irene ellison --3 90.3 irene underhill --3 64.29 irene young -3 48.09 jessica johnson --3 92.71 jessica king --3 85.96 jessica miller --3 62.81 jessica white --3 88.78 katie ichabod --3 3.82 luke garcia --3 56.2 luke ichabod +-3 52.13 victor thompson +-3 52.3 calvin white +-3 55.41 oscar johnson -3 55.59 luke king +-3 56.05 tom polk +-3 56.2 luke ichabod +-3 58.81 sarah miller +-3 59.07 bob falkner +-3 59.34 ulysses ellison -3 6.8 luke young --3 83.39 mike allen --3 74.92 mike king --3 2.26 mike polk +-3 6.83 bob van buren +-3 60.28 quinn polk +-3 61.81 victor robinson -3 62.12 mike white --3 81.97 mike xylophone --3 72.26 nick nixon --3 23.22 nick robinson --3 0.83 oscar davidson --3 43.73 oscar garcia --3 55.41 oscar johnson --3 94.22 oscar johnson --3 11.07 oscar miller --3 22.0 priscilla laertes +-3 62.39 victor xylophone +-3 62.77 alice king +-3 62.81 jessica white -3 63.19 priscilla quirinius --3 21.34 priscilla zipper --3 19.94 quinn ellison --3 60.28 quinn polk +-3 63.49 holly steinbeck +-3 64.29 irene young +-3 65.5 yuri xylophone +-3 68.73 holly nixon +-3 7.1 ulysses carson +-3 71.78 wendy robinson +-3 71.97 alice davidson +-3 72.26 nick nixon +-3 72.3 bob underhill +-3 73.83 bob ichabod +-3 74.92 mike king +-3 76.31 alice allen +-3 8.37 bob ellison +-3 80.65 gabriella brown +-3 81.39 tom steinbeck +-3 81.64 tom hernandez +-3 81.68 david hernandez -3 81.95 rachel davidson --3 0.56 rachel thompson --3 58.81 sarah miller +-3 81.97 mike xylophone +-3 82.74 holly polk +-3 83.39 mike allen -3 84.39 sarah robinson --3 38.22 sarah xylophone --3 97.88 sarah zipper --3 81.64 tom hernandez +-3 84.75 holly thompson +-3 85.96 jessica miller +-3 88.78 katie ichabod +-3 9.86 holly underhill +-3 90.3 irene underhill +-3 92.71 jessica king +-3 94.22 oscar johnson +-3 96.78 fred ellison -3 97.51 tom hernandez --3 56.05 tom polk --3 81.39 tom steinbeck --3 7.1 ulysses carson --3 59.34 ulysses ellison --3 10.26 ulysses quirinius --3 25.67 ulysses robinson --3 22.34 ulysses steinbeck --3 43.4 victor allen --3 43.69 victor hernandez --3 61.81 victor robinson --3 52.13 victor thompson --3 62.39 victor xylophone --3 25.5 wendy quirinius --3 71.78 wendy robinson --3 13.67 wendy xylophone --3 28.75 xavier garcia --3 28.78 xavier ovid --3 65.5 yuri xylophone --3 12.12 zach thompson --3 18.27 zach young --2 87.78 alice carson --2 16.98 alice nixon --2 6.49 alice underhill --2 26.68 alice underhill --2 48.3 alice xylophone --2 99.45 bob falkner --2 9.81 bob king --2 67.12 bob ovid --2 20.94 bob van buren --2 19.69 bob xylophone --2 76.54 calvin xylophone --2 1.79 david falkner --2 25.48 david laertes --2 94.57 david miller +-3 97.88 sarah zipper PREHOOK: query: select ctinyint, cstring1, cboolean2 from text_llap100 order by ctinyint, cstring1, cboolean2 PREHOOK: type: QUERY PREHOOK: Input: default@text_llap100 @@ -540,73 +540,67 @@ POSTHOOK: query: select ctinyint, cstring1, cboolean2 from text_llap100 order by POSTHOOK: type: QUERY POSTHOOK: Input: default@text_llap100 #### A masked pattern was here #### -NULL cvLH6Eat2yFsyy7p NULL -NULL cvLH6Eat2yFsyy7p NULL --62 cvLH6Eat2yFsyy7p NULL --59 cvLH6Eat2yFsyy7p NULL --57 cvLH6Eat2yFsyy7p NULL --56 cvLH6Eat2yFsyy7p NULL --56 cvLH6Eat2yFsyy7p NULL --55 cvLH6Eat2yFsyy7p NULL --55 cvLH6Eat2yFsyy7p NULL --54 cvLH6Eat2yFsyy7p NULL --53 cvLH6Eat2yFsyy7p NULL --51 cvLH6Eat2yFsyy7p NULL --50 cvLH6Eat2yFsyy7p NULL --48 cvLH6Eat2yFsyy7p NULL --48 cvLH6Eat2yFsyy7p NULL --47 cvLH6Eat2yFsyy7p NULL --45 cvLH6Eat2yFsyy7p NULL --45 cvLH6Eat2yFsyy7p NULL --44 cvLH6Eat2yFsyy7p NULL --43 cvLH6Eat2yFsyy7p NULL --40 cvLH6Eat2yFsyy7p NULL --37 cvLH6Eat2yFsyy7p NULL --36 cvLH6Eat2yFsyy7p NULL --34 cvLH6Eat2yFsyy7p NULL --34 cvLH6Eat2yFsyy7p NULL --33 cvLH6Eat2yFsyy7p NULL --33 cvLH6Eat2yFsyy7p NULL --32 cvLH6Eat2yFsyy7p NULL --30 cvLH6Eat2yFsyy7p NULL --28 cvLH6Eat2yFsyy7p NULL --28 cvLH6Eat2yFsyy7p NULL --24 cvLH6Eat2yFsyy7p NULL --23 cvLH6Eat2yFsyy7p NULL --23 cvLH6Eat2yFsyy7p NULL --23 cvLH6Eat2yFsyy7p NULL --22 cvLH6Eat2yFsyy7p NULL --22 cvLH6Eat2yFsyy7p NULL --22 cvLH6Eat2yFsyy7p NULL --21 cvLH6Eat2yFsyy7p NULL --21 cvLH6Eat2yFsyy7p NULL --19 cvLH6Eat2yFsyy7p NULL --16 cvLH6Eat2yFsyy7p NULL --16 cvLH6Eat2yFsyy7p NULL --13 cvLH6Eat2yFsyy7p NULL --12 cvLH6Eat2yFsyy7p NULL +-1 cvLH6Eat2yFsyy7p NULL -11 cvLH6Eat2yFsyy7p NULL -11 cvLH6Eat2yFsyy7p NULL -11 cvLH6Eat2yFsyy7p NULL --7 cvLH6Eat2yFsyy7p NULL +-12 cvLH6Eat2yFsyy7p NULL +-13 cvLH6Eat2yFsyy7p NULL +-16 cvLH6Eat2yFsyy7p NULL +-16 cvLH6Eat2yFsyy7p NULL +-19 cvLH6Eat2yFsyy7p NULL +-21 cvLH6Eat2yFsyy7p NULL +-21 cvLH6Eat2yFsyy7p NULL +-22 cvLH6Eat2yFsyy7p NULL +-22 cvLH6Eat2yFsyy7p NULL +-22 cvLH6Eat2yFsyy7p NULL +-23 cvLH6Eat2yFsyy7p NULL +-23 cvLH6Eat2yFsyy7p NULL +-23 cvLH6Eat2yFsyy7p NULL +-24 cvLH6Eat2yFsyy7p NULL +-28 cvLH6Eat2yFsyy7p NULL +-28 cvLH6Eat2yFsyy7p NULL +-30 cvLH6Eat2yFsyy7p NULL +-32 cvLH6Eat2yFsyy7p NULL +-33 cvLH6Eat2yFsyy7p NULL +-33 cvLH6Eat2yFsyy7p NULL +-34 cvLH6Eat2yFsyy7p NULL +-34 cvLH6Eat2yFsyy7p NULL +-36 cvLH6Eat2yFsyy7p NULL +-37 cvLH6Eat2yFsyy7p NULL +-4 cvLH6Eat2yFsyy7p NULL +-4 cvLH6Eat2yFsyy7p NULL +-40 cvLH6Eat2yFsyy7p NULL +-43 cvLH6Eat2yFsyy7p NULL +-44 cvLH6Eat2yFsyy7p NULL +-45 cvLH6Eat2yFsyy7p NULL +-45 cvLH6Eat2yFsyy7p NULL +-47 cvLH6Eat2yFsyy7p NULL +-48 cvLH6Eat2yFsyy7p NULL +-48 cvLH6Eat2yFsyy7p NULL -5 cvLH6Eat2yFsyy7p NULL -5 cvLH6Eat2yFsyy7p NULL -5 cvLH6Eat2yFsyy7p NULL --4 cvLH6Eat2yFsyy7p NULL --4 cvLH6Eat2yFsyy7p NULL --1 cvLH6Eat2yFsyy7p NULL +-50 cvLH6Eat2yFsyy7p NULL +-51 cvLH6Eat2yFsyy7p NULL +-53 cvLH6Eat2yFsyy7p NULL +-54 cvLH6Eat2yFsyy7p NULL +-55 cvLH6Eat2yFsyy7p NULL +-55 cvLH6Eat2yFsyy7p NULL +-56 cvLH6Eat2yFsyy7p NULL +-56 cvLH6Eat2yFsyy7p NULL +-57 cvLH6Eat2yFsyy7p NULL +-59 cvLH6Eat2yFsyy7p NULL +-62 cvLH6Eat2yFsyy7p NULL +-7 cvLH6Eat2yFsyy7p NULL 0 cvLH6Eat2yFsyy7p NULL 0 cvLH6Eat2yFsyy7p NULL -2 cvLH6Eat2yFsyy7p NULL -4 cvLH6Eat2yFsyy7p NULL -5 cvLH6Eat2yFsyy7p NULL -8 cvLH6Eat2yFsyy7p NULL -9 cvLH6Eat2yFsyy7p NULL 10 cvLH6Eat2yFsyy7p NULL 13 cvLH6Eat2yFsyy7p NULL 16 cvLH6Eat2yFsyy7p NULL 18 cvLH6Eat2yFsyy7p NULL 19 cvLH6Eat2yFsyy7p NULL +2 cvLH6Eat2yFsyy7p NULL 21 cvLH6Eat2yFsyy7p NULL 24 cvLH6Eat2yFsyy7p NULL 24 cvLH6Eat2yFsyy7p NULL @@ -627,11 +621,13 @@ NULL cvLH6Eat2yFsyy7p NULL 38 cvLH6Eat2yFsyy7p NULL 38 cvLH6Eat2yFsyy7p NULL 39 cvLH6Eat2yFsyy7p NULL +4 cvLH6Eat2yFsyy7p NULL 40 cvLH6Eat2yFsyy7p NULL 40 cvLH6Eat2yFsyy7p NULL 41 cvLH6Eat2yFsyy7p NULL 43 cvLH6Eat2yFsyy7p NULL 46 cvLH6Eat2yFsyy7p NULL +5 cvLH6Eat2yFsyy7p NULL 51 cvLH6Eat2yFsyy7p NULL 51 cvLH6Eat2yFsyy7p NULL 53 cvLH6Eat2yFsyy7p NULL @@ -640,6 +636,10 @@ NULL cvLH6Eat2yFsyy7p NULL 61 cvLH6Eat2yFsyy7p NULL 61 cvLH6Eat2yFsyy7p NULL 62 cvLH6Eat2yFsyy7p NULL +8 cvLH6Eat2yFsyy7p NULL +9 cvLH6Eat2yFsyy7p NULL +NULL cvLH6Eat2yFsyy7p NULL +NULL cvLH6Eat2yFsyy7p NULL PREHOOK: query: select * from text_llap100 order by cint, cstring1, cstring2 PREHOOK: type: QUERY PREHOOK: Input: default@text_llap100 @@ -648,184 +648,194 @@ POSTHOOK: query: select * from text_llap100 order by cint, cstring1, cstring2 POSTHOOK: type: QUERY POSTHOOK: Input: default@text_llap100 #### A masked pattern was here #### --50 -13326 528534767 NULL -50.0 -13326.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:46.674 1969-12-31 16:00:08.875 true NULL -NULL -4213 528534767 NULL NULL -4213.0 cvLH6Eat2yFsyy7p NULL NULL 1969-12-31 16:00:13.589 true NULL --28 -15813 528534767 NULL -28.0 -15813.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.787 1969-12-31 16:00:01.546 true NULL -31 -9566 528534767 NULL 31.0 -9566.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.187 1969-12-31 16:00:06.961 true NULL --34 15007 528534767 NULL -34.0 15007.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:50.434 1969-12-31 16:00:13.352 true NULL -29 7021 528534767 NULL 29.0 7021.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:15.007 1969-12-31 16:00:15.148 true NULL -31 4963 528534767 NULL 31.0 4963.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:07.021 1969-12-31 16:00:02.997 true NULL -27 -7824 528534767 NULL 27.0 -7824.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:04.963 1969-12-31 15:59:56.474 true NULL +-1 -75 528534767 NULL -1.389 -863.257 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.331 1969-12-31 16:00:07.585 true NULL -11 -15431 528534767 NULL -11.0 -15431.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.176 1969-12-31 16:00:07.787 true NULL -61 -15549 528534767 NULL 61.0 -15549.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.569 1969-12-31 15:59:51.665 true NULL -16 5780 528534767 NULL 16.0 5780.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.451 1969-12-31 16:00:12.752 true NULL -5 14625 528534767 NULL 5.0 14625.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:05.78 1969-12-31 16:00:15.34 true NULL --23 13026 528534767 NULL -23.0 13026.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:14.625 1969-12-31 16:00:10.77 true NULL --51 -12083 528534767 NULL -51.0 -12083.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.026 1969-12-31 16:00:02.52 true NULL --11 9472 528534767 NULL -11.0 9472.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:47.917 1969-12-31 16:00:03.716 true NULL --48 -7735 528534767 NULL -48.0 -7735.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:09.472 1969-12-31 16:00:00.8 true NULL --62 10 528534767 NULL -62.0 10.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.265 1969-12-31 15:59:56.584 true NULL --45 5521 528534767 NULL -45.0 5521.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:00.01 1969-12-31 15:59:48.553 true NULL -40 -1724 528534767 NULL 40.0 -1724.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:05.521 1969-12-31 15:59:57.835 true NULL -39 -10909 528534767 NULL 39.0 -10909.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.276 1969-12-31 16:00:12.738 true NULL --32 11242 528534767 NULL -32.0 11242.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.091 1969-12-31 15:59:55.681 true NULL --56 8353 528534767 NULL -56.0 8353.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:11.242 1969-12-31 15:59:46.526 true NULL --7 2541 528534767 NULL -7.0 2541.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:08.353 1969-12-31 15:59:57.374 true NULL -24 4432 528534767 NULL 24.0 4432.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:02.541 1969-12-31 16:00:10.895 true NULL -36 -15912 528534767 NULL 36.0 -15912.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:04.432 1969-12-31 16:00:04.376 true NULL --23 -10154 528534767 NULL -23.0 -10154.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.088 1969-12-31 15:59:56.086 true NULL --55 -7449 528534767 NULL -55.0 -7449.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.846 1969-12-31 15:59:55.75 true NULL -11 7476 528534767 NULL -11.0 7476.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.551 1969-12-31 15:59:57.567 true NULL -51 -4490 528534767 NULL 51.0 -4490.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:07.476 1969-12-31 15:59:49.318 true NULL --24 163 528534767 NULL -24.0 163.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.51 1969-12-31 16:00:04.014 true NULL --44 -1299 528534767 NULL -44.0 -1299.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:00.163 1969-12-31 15:59:47.687 true NULL -8 7860 528534767 NULL 8.0 7860.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.701 1969-12-31 16:00:01.97 true NULL -24 -4812 528534767 NULL 24.0 -4812.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:07.86 1969-12-31 15:59:55 true NULL -4 -14739 528534767 NULL 4.0 -14739.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.188 1969-12-31 16:00:15.26 true NULL --57 -11492 528534767 NULL -57.0 -11492.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.261 1969-12-31 16:00:05.306 true NULL --22 3856 528534767 NULL -22.0 3856.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:48.508 1969-12-31 15:59:54.534 true NULL -28 8035 528534767 NULL 28.0 8035.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:03.856 1969-12-31 15:59:55.95 true NULL --16 -7964 528534767 NULL -16.0 -7964.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:08.035 1969-12-31 16:00:12.464 true NULL -46 6958 528534767 NULL 46.0 6958.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.036 1969-12-31 16:00:10.191 true NULL -29 -1990 528534767 NULL 29.0 -1990.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:06.958 1969-12-31 15:59:52.902 true NULL --56 8402 528534767 NULL -56.0 8402.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.01 1969-12-31 16:00:05.146 true NULL +-11 9472 528534767 NULL -11.0 9472.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:47.917 1969-12-31 16:00:03.716 true NULL +-12 -2013 528534767 NULL -12.0 -2013.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:14.907 1969-12-31 15:59:58.789 true NULL +-13 -13372 528534767 NULL -13.0 -13372.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:08.499 1969-12-31 15:59:48.221 true NULL -16 -6922 528534767 NULL -16.0 -6922.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:08.402 1969-12-31 15:59:50.561 true NULL -38 -6583 528534767 NULL 38.0 -6583.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:53.078 1969-12-31 16:00:06.722 true NULL --54 -10268 528534767 NULL -54.0 -10268.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:53.417 1969-12-31 16:00:00.687 true NULL --23 4587 528534767 NULL -23.0 4587.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.732 1969-12-31 15:59:48.52 true NULL +-16 -7964 528534767 NULL -16.0 -7964.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:08.035 1969-12-31 16:00:12.464 true NULL -19 1206 528534767 NULL -19.0 1206.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:04.587 1969-12-31 16:00:08.381 true NULL -40 -7984 528534767 NULL 40.0 -7984.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:01.206 1969-12-31 16:00:02.59 true NULL -62 6557 528534767 NULL 62.0 6557.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.016 1969-12-31 16:00:00.367 true NULL --34 4181 528534767 NULL -34.0 4181.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:06.557 1969-12-31 16:00:04.869 true NULL -53 -10129 528534767 NULL 53.0 -10129.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:04.181 1969-12-31 16:00:08.061 true NULL -51 -15790 528534767 NULL 51.0 -15790.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.871 1969-12-31 15:59:57.821 true NULL --4 2617 528534767 NULL -4.0 2617.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.21 1969-12-31 15:59:44.733 true NULL -61 12161 528534767 NULL 61.0 12161.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:02.617 1969-12-31 16:00:10.536 true NULL -19 7952 528534767 NULL 19.0 7952.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:12.161 1969-12-31 16:00:00.95 true NULL +-21 -7183 528534767 NULL -21.0 -7183.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.035 1969-12-31 16:00:06.182 true NULL +-21 3168 528534767 NULL -21.0 3168.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:56.834 1969-12-31 16:00:13.331 true NULL +-22 3856 528534767 NULL -22.0 3856.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:48.508 1969-12-31 15:59:54.534 true NULL +-22 77 528534767 NULL -22.0 77.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.928 1969-12-31 15:59:43.621 true NULL +-22 8499 528534767 NULL -22.0 8499.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:15.626 1969-12-31 16:00:10.923 true NULL +-23 -10154 528534767 NULL -23.0 -10154.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.088 1969-12-31 15:59:56.086 true NULL +-23 13026 528534767 NULL -23.0 13026.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:14.625 1969-12-31 16:00:10.77 true NULL +-23 4587 528534767 NULL -23.0 4587.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.732 1969-12-31 15:59:48.52 true NULL +-24 163 528534767 NULL -24.0 163.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.51 1969-12-31 16:00:04.014 true NULL +-28 -15813 528534767 NULL -28.0 -15813.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.787 1969-12-31 16:00:01.546 true NULL +-28 6453 528534767 NULL -28.0 6453.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:01.475 1969-12-31 16:00:07.828 true NULL +-30 834 528534767 NULL -30.0 834.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:14.072 1969-12-31 16:00:03.004 true NULL +-32 11242 528534767 NULL -32.0 11242.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.091 1969-12-31 15:59:55.681 true NULL +-33 14072 528534767 NULL -33.0 14072.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:03.168 1969-12-31 15:59:55.836 true NULL -33 7350 528534767 NULL -33.0 7350.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:07.952 1969-12-31 15:59:48.183 true NULL -53 -12171 528534767 NULL 53.0 -12171.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:07.35 1969-12-31 15:59:57.549 true NULL -18 -3045 528534767 NULL 18.0 -3045.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:47.829 1969-12-31 16:00:05.045 true NULL -30 -814 528534767 NULL 30.0 -814.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:56.955 1969-12-31 16:00:11.799 true NULL +-34 15007 528534767 NULL -34.0 15007.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:50.434 1969-12-31 16:00:13.352 true NULL +-34 4181 528534767 NULL -34.0 4181.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:06.557 1969-12-31 16:00:04.869 true NULL -36 1639 528534767 NULL -36.0 1639.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:00.186 1969-12-31 16:00:13.098 true NULL -34 -15059 528534767 NULL 34.0 -15059.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:01.639 1969-12-31 16:00:13.206 true NULL --55 -7353 528534767 NULL -55.0 -7353.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.941 1969-12-31 15:59:54.268 true NULL +-37 -12472 528534767 NULL -37.0 -12472.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.3 1969-12-31 15:59:55.998 true NULL +-4 -1027 528534767 NULL -4.0 -1027.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:46.628 1969-12-31 16:00:11.413 true NULL +-4 2617 528534767 NULL -4.0 2617.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.21 1969-12-31 15:59:44.733 true NULL -40 -4463 528534767 NULL -40.0 -4463.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.647 1969-12-31 15:59:46.254 true NULL -21 11737 528534767 NULL 21.0 11737.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.537 1969-12-31 15:59:45.022 true NULL -61 -1254 528534767 NULL 61.0 -1254.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:11.737 1969-12-31 16:00:12.004 true NULL --59 10688 528534767 NULL -59.0 10688.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.746 1969-12-31 16:00:15.489 true NULL -0 -3166 528534767 NULL 0.0 -3166.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:10.688 1969-12-31 16:00:01.385 true NULL --21 3168 528534767 NULL -21.0 3168.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:56.834 1969-12-31 16:00:13.331 true NULL --33 14072 528534767 NULL -33.0 14072.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:03.168 1969-12-31 15:59:55.836 true NULL --30 834 528534767 NULL -30.0 834.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:14.072 1969-12-31 16:00:03.004 true NULL +-43 486 528534767 NULL -43.0 486.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:01.345 1969-12-31 15:59:52.667 true NULL +-44 -1299 528534767 NULL -44.0 -1299.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:00.163 1969-12-31 15:59:47.687 true NULL +-45 -14072 528534767 NULL -45.0 -14072.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.621 1969-12-31 15:59:45.914 true NULL +-45 5521 528534767 NULL -45.0 5521.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:00.01 1969-12-31 15:59:48.553 true NULL +-47 -2468 528534767 NULL -47.0 -2468.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:48.68 1969-12-31 16:00:02.94 true NULL +-48 -7735 528534767 NULL -48.0 -7735.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:09.472 1969-12-31 16:00:00.8 true NULL +-48 13300 528534767 NULL -48.0 13300.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:00.077 1969-12-31 15:59:45.827 true NULL -5 -13229 528534767 NULL -5.0 -13229.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:00.834 1969-12-31 16:00:00.388 true NULL --53 -3419 528534767 NULL -53.0 -3419.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:46.771 1969-12-31 15:59:53.744 true NULL -34 -4255 528534767 NULL 34.0 -4255.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:56.581 1969-12-31 15:59:57.88 true NULL --5 12422 528534767 NULL -5.0 12422.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.745 1969-12-31 15:59:48.802 true NULL -27 -14965 528534767 NULL 27.0 -14965.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:12.422 1969-12-31 16:00:09.517 true NULL -NULL -3012 528534767 NULL NULL -3012.0 cvLH6Eat2yFsyy7p NULL NULL 1969-12-31 16:00:03.756 true NULL --21 -7183 528534767 NULL -21.0 -7183.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.035 1969-12-31 16:00:06.182 true NULL -43 1475 528534767 NULL 43.0 1475.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:56.988 1969-12-31 16:00:03.442 true NULL -41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL --28 6453 528534767 NULL -28.0 6453.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:01.475 1969-12-31 16:00:07.828 true NULL -5 -14379 528534767 NULL -5.0 -14379.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:00.037 1969-12-31 15:59:49.141 true NULL -13 1358 528534767 NULL 13.0 1358.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:06.453 1969-12-31 16:00:00.423 true NULL --45 -14072 528534767 NULL -45.0 -14072.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.621 1969-12-31 15:59:45.914 true NULL +-5 12422 528534767 NULL -5.0 12422.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.745 1969-12-31 15:59:48.802 true NULL +-50 -13326 528534767 NULL -50.0 -13326.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:46.674 1969-12-31 16:00:08.875 true NULL +-51 -12083 528534767 NULL -51.0 -12083.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.026 1969-12-31 16:00:02.52 true NULL +-53 -3419 528534767 NULL -53.0 -3419.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:46.771 1969-12-31 15:59:53.744 true NULL +-54 -10268 528534767 NULL -54.0 -10268.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:53.417 1969-12-31 16:00:00.687 true NULL +-55 -7353 528534767 NULL -55.0 -7353.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.941 1969-12-31 15:59:54.268 true NULL +-55 -7449 528534767 NULL -55.0 -7449.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.846 1969-12-31 15:59:55.75 true NULL +-56 8353 528534767 NULL -56.0 8353.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:11.242 1969-12-31 15:59:46.526 true NULL +-56 8402 528534767 NULL -56.0 8402.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.01 1969-12-31 16:00:05.146 true NULL +-57 -11492 528534767 NULL -57.0 -11492.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.261 1969-12-31 16:00:05.306 true NULL +-59 10688 528534767 NULL -59.0 10688.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.746 1969-12-31 16:00:15.489 true NULL +-62 10 528534767 NULL -62.0 10.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.265 1969-12-31 15:59:56.584 true NULL +-7 2541 528534767 NULL -7.0 2541.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:08.353 1969-12-31 15:59:57.374 true NULL +0 -3166 528534767 NULL 0.0 -3166.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:10.688 1969-12-31 16:00:01.385 true NULL +0 15626 528534767 NULL 0.0 15626.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:09.566 1969-12-31 16:00:15.217 true NULL 10 9366 528534767 NULL 10.0 9366.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:01.358 1969-12-31 15:59:50.592 true NULL --22 77 528534767 NULL -22.0 77.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.928 1969-12-31 15:59:43.621 true NULL -38 -4667 528534767 NULL 38.0 -4667.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:09.366 1969-12-31 15:59:52.334 true NULL --48 13300 528534767 NULL -48.0 13300.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:00.077 1969-12-31 15:59:45.827 true NULL +13 1358 528534767 NULL 13.0 1358.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:06.453 1969-12-31 16:00:00.423 true NULL +16 5780 528534767 NULL 16.0 5780.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.451 1969-12-31 16:00:12.752 true NULL +18 -3045 528534767 NULL 18.0 -3045.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:47.829 1969-12-31 16:00:05.045 true NULL +19 7952 528534767 NULL 19.0 7952.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:12.161 1969-12-31 16:00:00.95 true NULL 2 1345 528534767 NULL 2.0 1345.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.333 1969-12-31 16:00:00.517 true NULL --37 -12472 528534767 NULL -37.0 -12472.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.3 1969-12-31 15:59:55.998 true NULL --43 486 528534767 NULL -43.0 486.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:01.345 1969-12-31 15:59:52.667 true NULL -36 14907 528534767 NULL 36.0 14907.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:47.528 1969-12-31 15:59:47.206 true NULL --1 -75 528534767 NULL -1.389 -863.257 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.331 1969-12-31 16:00:07.585 true NULL --12 -2013 528534767 NULL -12.0 -2013.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:14.907 1969-12-31 15:59:58.789 true NULL -0 15626 528534767 NULL 0.0 15626.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:09.566 1969-12-31 16:00:15.217 true NULL +21 11737 528534767 NULL 21.0 11737.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.537 1969-12-31 15:59:45.022 true NULL +24 -4812 528534767 NULL 24.0 -4812.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:07.86 1969-12-31 15:59:55 true NULL +24 4432 528534767 NULL 24.0 4432.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:02.541 1969-12-31 16:00:10.895 true NULL 26 3961 528534767 NULL 26.0 3961.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:57.987 1969-12-31 15:59:52.232 true NULL --22 8499 528534767 NULL -22.0 8499.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:15.626 1969-12-31 16:00:10.923 true NULL -9 9169 528534767 NULL 9.0 9169.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:03.961 1969-12-31 16:00:14.126 true NULL --13 -13372 528534767 NULL -13.0 -13372.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:08.499 1969-12-31 15:59:48.221 true NULL +27 -14965 528534767 NULL 27.0 -14965.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:12.422 1969-12-31 16:00:09.517 true NULL +27 -7824 528534767 NULL 27.0 -7824.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:04.963 1969-12-31 15:59:56.474 true NULL +28 8035 528534767 NULL 28.0 8035.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:03.856 1969-12-31 15:59:55.95 true NULL +29 -1990 528534767 NULL 29.0 -1990.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:06.958 1969-12-31 15:59:52.902 true NULL +29 7021 528534767 NULL 29.0 7021.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:15.007 1969-12-31 16:00:15.148 true NULL +30 -814 528534767 NULL 30.0 -814.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:56.955 1969-12-31 16:00:11.799 true NULL +31 -9566 528534767 NULL 31.0 -9566.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.187 1969-12-31 16:00:06.961 true NULL +31 4963 528534767 NULL 31.0 4963.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:07.021 1969-12-31 16:00:02.997 true NULL +34 -15059 528534767 NULL 34.0 -15059.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:01.639 1969-12-31 16:00:13.206 true NULL +34 -4255 528534767 NULL 34.0 -4255.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:56.581 1969-12-31 15:59:57.88 true NULL +36 -15912 528534767 NULL 36.0 -15912.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:04.432 1969-12-31 16:00:04.376 true NULL +36 14907 528534767 NULL 36.0 14907.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:47.528 1969-12-31 15:59:47.206 true NULL 38 -11320 528534767 NULL 38.0 -11320.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:09.169 1969-12-31 16:00:03.822 true NULL --4 -1027 528534767 NULL -4.0 -1027.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:46.628 1969-12-31 16:00:11.413 true NULL --47 -2468 528534767 NULL -47.0 -2468.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:48.68 1969-12-31 16:00:02.94 true NULL -PREHOOK: query: select csmallint, cstring1, cboolean2 from text_llap100 order by csmallint, cstring1, cboolean2 -PREHOOK: type: QUERY -PREHOOK: Input: default@text_llap100 -#### A masked pattern was here #### -POSTHOOK: query: select csmallint, cstring1, cboolean2 from text_llap100 order by csmallint, cstring1, cboolean2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@text_llap100 -#### A masked pattern was here #### --15912 cvLH6Eat2yFsyy7p NULL --15813 cvLH6Eat2yFsyy7p NULL --15790 cvLH6Eat2yFsyy7p NULL --15549 cvLH6Eat2yFsyy7p NULL --15431 cvLH6Eat2yFsyy7p NULL --15059 cvLH6Eat2yFsyy7p NULL --14965 cvLH6Eat2yFsyy7p NULL --14739 cvLH6Eat2yFsyy7p NULL --14379 cvLH6Eat2yFsyy7p NULL --14072 cvLH6Eat2yFsyy7p NULL --13372 cvLH6Eat2yFsyy7p NULL --13326 cvLH6Eat2yFsyy7p NULL --13229 cvLH6Eat2yFsyy7p NULL --12472 cvLH6Eat2yFsyy7p NULL --12171 cvLH6Eat2yFsyy7p NULL --12083 cvLH6Eat2yFsyy7p NULL --11492 cvLH6Eat2yFsyy7p NULL --11320 cvLH6Eat2yFsyy7p NULL --10909 cvLH6Eat2yFsyy7p NULL --10268 cvLH6Eat2yFsyy7p NULL --10154 cvLH6Eat2yFsyy7p NULL --10129 cvLH6Eat2yFsyy7p NULL --9566 cvLH6Eat2yFsyy7p NULL --7984 cvLH6Eat2yFsyy7p NULL --7964 cvLH6Eat2yFsyy7p NULL --7824 cvLH6Eat2yFsyy7p NULL --7735 cvLH6Eat2yFsyy7p NULL --7449 cvLH6Eat2yFsyy7p NULL --7353 cvLH6Eat2yFsyy7p NULL --7183 cvLH6Eat2yFsyy7p NULL --6922 cvLH6Eat2yFsyy7p NULL --6583 cvLH6Eat2yFsyy7p NULL --4812 cvLH6Eat2yFsyy7p NULL --4667 cvLH6Eat2yFsyy7p NULL --4490 cvLH6Eat2yFsyy7p NULL --4463 cvLH6Eat2yFsyy7p NULL --4255 cvLH6Eat2yFsyy7p NULL --4213 cvLH6Eat2yFsyy7p NULL --3419 cvLH6Eat2yFsyy7p NULL --3166 cvLH6Eat2yFsyy7p NULL --3045 cvLH6Eat2yFsyy7p NULL --3012 cvLH6Eat2yFsyy7p NULL --2468 cvLH6Eat2yFsyy7p NULL --2013 cvLH6Eat2yFsyy7p NULL --1990 cvLH6Eat2yFsyy7p NULL --1724 cvLH6Eat2yFsyy7p NULL --1299 cvLH6Eat2yFsyy7p NULL --1254 cvLH6Eat2yFsyy7p NULL +38 -4667 528534767 NULL 38.0 -4667.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:09.366 1969-12-31 15:59:52.334 true NULL +38 -6583 528534767 NULL 38.0 -6583.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:53.078 1969-12-31 16:00:06.722 true NULL +39 -10909 528534767 NULL 39.0 -10909.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.276 1969-12-31 16:00:12.738 true NULL +4 -14739 528534767 NULL 4.0 -14739.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.188 1969-12-31 16:00:15.26 true NULL +40 -1724 528534767 NULL 40.0 -1724.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:05.521 1969-12-31 15:59:57.835 true NULL +40 -7984 528534767 NULL 40.0 -7984.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:01.206 1969-12-31 16:00:02.59 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +43 1475 528534767 NULL 43.0 1475.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:56.988 1969-12-31 16:00:03.442 true NULL +46 6958 528534767 NULL 46.0 6958.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.036 1969-12-31 16:00:10.191 true NULL +5 14625 528534767 NULL 5.0 14625.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:05.78 1969-12-31 16:00:15.34 true NULL +51 -15790 528534767 NULL 51.0 -15790.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:49.871 1969-12-31 15:59:57.821 true NULL +51 -4490 528534767 NULL 51.0 -4490.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:07.476 1969-12-31 15:59:49.318 true NULL +53 -10129 528534767 NULL 53.0 -10129.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:04.181 1969-12-31 16:00:08.061 true NULL +53 -12171 528534767 NULL 53.0 -12171.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:07.35 1969-12-31 15:59:57.549 true NULL +61 -1254 528534767 NULL 61.0 -1254.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:11.737 1969-12-31 16:00:12.004 true NULL +61 -15549 528534767 NULL 61.0 -15549.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:44.569 1969-12-31 15:59:51.665 true NULL +61 12161 528534767 NULL 61.0 12161.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:02.617 1969-12-31 16:00:10.536 true NULL +62 6557 528534767 NULL 62.0 6557.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.016 1969-12-31 16:00:00.367 true NULL +8 7860 528534767 NULL 8.0 7860.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.701 1969-12-31 16:00:01.97 true NULL +9 9169 528534767 NULL 9.0 9169.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:03.961 1969-12-31 16:00:14.126 true NULL +NULL -3012 528534767 NULL NULL -3012.0 cvLH6Eat2yFsyy7p NULL NULL 1969-12-31 16:00:03.756 true NULL +NULL -4213 528534767 NULL NULL -4213.0 cvLH6Eat2yFsyy7p NULL NULL 1969-12-31 16:00:13.589 true NULL +PREHOOK: query: select csmallint, cstring1, cboolean2 from text_llap100 order by csmallint, cstring1, cboolean2 +PREHOOK: type: QUERY +PREHOOK: Input: default@text_llap100 +#### A masked pattern was here #### +POSTHOOK: query: select csmallint, cstring1, cboolean2 from text_llap100 order by csmallint, cstring1, cboolean2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@text_llap100 +#### A masked pattern was here #### +-10129 cvLH6Eat2yFsyy7p NULL +-10154 cvLH6Eat2yFsyy7p NULL +-10268 cvLH6Eat2yFsyy7p NULL -1027 cvLH6Eat2yFsyy7p NULL --814 cvLH6Eat2yFsyy7p NULL +-10909 cvLH6Eat2yFsyy7p NULL +-11320 cvLH6Eat2yFsyy7p NULL +-11492 cvLH6Eat2yFsyy7p NULL +-12083 cvLH6Eat2yFsyy7p NULL +-12171 cvLH6Eat2yFsyy7p NULL +-12472 cvLH6Eat2yFsyy7p NULL +-1254 cvLH6Eat2yFsyy7p NULL +-1299 cvLH6Eat2yFsyy7p NULL +-13229 cvLH6Eat2yFsyy7p NULL +-13326 cvLH6Eat2yFsyy7p NULL +-13372 cvLH6Eat2yFsyy7p NULL +-14072 cvLH6Eat2yFsyy7p NULL +-14379 cvLH6Eat2yFsyy7p NULL +-14739 cvLH6Eat2yFsyy7p NULL +-14965 cvLH6Eat2yFsyy7p NULL +-15059 cvLH6Eat2yFsyy7p NULL +-15431 cvLH6Eat2yFsyy7p NULL +-15549 cvLH6Eat2yFsyy7p NULL +-15790 cvLH6Eat2yFsyy7p NULL +-15813 cvLH6Eat2yFsyy7p NULL +-15912 cvLH6Eat2yFsyy7p NULL +-1724 cvLH6Eat2yFsyy7p NULL +-1990 cvLH6Eat2yFsyy7p NULL +-2013 cvLH6Eat2yFsyy7p NULL +-2468 cvLH6Eat2yFsyy7p NULL +-3012 cvLH6Eat2yFsyy7p NULL +-3045 cvLH6Eat2yFsyy7p NULL +-3166 cvLH6Eat2yFsyy7p NULL +-3419 cvLH6Eat2yFsyy7p NULL +-4213 cvLH6Eat2yFsyy7p NULL +-4255 cvLH6Eat2yFsyy7p NULL +-4463 cvLH6Eat2yFsyy7p NULL +-4490 cvLH6Eat2yFsyy7p NULL +-4667 cvLH6Eat2yFsyy7p NULL +-4812 cvLH6Eat2yFsyy7p NULL +-6583 cvLH6Eat2yFsyy7p NULL +-6922 cvLH6Eat2yFsyy7p NULL +-7183 cvLH6Eat2yFsyy7p NULL +-7353 cvLH6Eat2yFsyy7p NULL +-7449 cvLH6Eat2yFsyy7p NULL -75 cvLH6Eat2yFsyy7p NULL +-7735 cvLH6Eat2yFsyy7p NULL +-7824 cvLH6Eat2yFsyy7p NULL +-7964 cvLH6Eat2yFsyy7p NULL +-7984 cvLH6Eat2yFsyy7p NULL +-814 cvLH6Eat2yFsyy7p NULL +-9566 cvLH6Eat2yFsyy7p NULL 10 cvLH6Eat2yFsyy7p NULL -37 cvLH6Eat2yFsyy7p NULL -77 cvLH6Eat2yFsyy7p NULL -163 cvLH6Eat2yFsyy7p NULL -486 cvLH6Eat2yFsyy7p NULL -834 cvLH6Eat2yFsyy7p NULL +10688 cvLH6Eat2yFsyy7p NULL +11242 cvLH6Eat2yFsyy7p NULL +11737 cvLH6Eat2yFsyy7p NULL 1206 cvLH6Eat2yFsyy7p NULL +12161 cvLH6Eat2yFsyy7p NULL +12422 cvLH6Eat2yFsyy7p NULL +13026 cvLH6Eat2yFsyy7p NULL +13300 cvLH6Eat2yFsyy7p NULL 1345 cvLH6Eat2yFsyy7p NULL 1358 cvLH6Eat2yFsyy7p NULL +14072 cvLH6Eat2yFsyy7p NULL +14625 cvLH6Eat2yFsyy7p NULL 1475 cvLH6Eat2yFsyy7p NULL +14907 cvLH6Eat2yFsyy7p NULL +15007 cvLH6Eat2yFsyy7p NULL +15626 cvLH6Eat2yFsyy7p NULL +163 cvLH6Eat2yFsyy7p NULL 1639 cvLH6Eat2yFsyy7p NULL 2541 cvLH6Eat2yFsyy7p NULL 2617 cvLH6Eat2yFsyy7p NULL 3168 cvLH6Eat2yFsyy7p NULL +37 cvLH6Eat2yFsyy7p NULL 3856 cvLH6Eat2yFsyy7p NULL 3961 cvLH6Eat2yFsyy7p NULL 4181 cvLH6Eat2yFsyy7p NULL 4432 cvLH6Eat2yFsyy7p NULL 4587 cvLH6Eat2yFsyy7p NULL +486 cvLH6Eat2yFsyy7p NULL 4963 cvLH6Eat2yFsyy7p NULL 5521 cvLH6Eat2yFsyy7p NULL 5780 cvLH6Eat2yFsyy7p NULL @@ -835,27 +845,17 @@ POSTHOOK: Input: default@text_llap100 7021 cvLH6Eat2yFsyy7p NULL 7350 cvLH6Eat2yFsyy7p NULL 7476 cvLH6Eat2yFsyy7p NULL +77 cvLH6Eat2yFsyy7p NULL 7860 cvLH6Eat2yFsyy7p NULL 7952 cvLH6Eat2yFsyy7p NULL 8035 cvLH6Eat2yFsyy7p NULL +834 cvLH6Eat2yFsyy7p NULL 8353 cvLH6Eat2yFsyy7p NULL 8402 cvLH6Eat2yFsyy7p NULL 8499 cvLH6Eat2yFsyy7p NULL 9169 cvLH6Eat2yFsyy7p NULL 9366 cvLH6Eat2yFsyy7p NULL 9472 cvLH6Eat2yFsyy7p NULL -10688 cvLH6Eat2yFsyy7p NULL -11242 cvLH6Eat2yFsyy7p NULL -11737 cvLH6Eat2yFsyy7p NULL -12161 cvLH6Eat2yFsyy7p NULL -12422 cvLH6Eat2yFsyy7p NULL -13026 cvLH6Eat2yFsyy7p NULL -13300 cvLH6Eat2yFsyy7p NULL -14072 cvLH6Eat2yFsyy7p NULL -14625 cvLH6Eat2yFsyy7p NULL -14907 cvLH6Eat2yFsyy7p NULL -15007 cvLH6Eat2yFsyy7p NULL -15626 cvLH6Eat2yFsyy7p NULL PREHOOK: query: select t, s, ts from text_llap2 order by t, s, ts limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@text_llap2 @@ -864,6 +864,20 @@ POSTHOOK: query: select t, s, ts from text_llap2 order by t, s, ts limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@text_llap2 #### A masked pattern was here #### +-2 alice carson 2013-03-01 09:11:58.703074 +-2 alice nixon 2013-03-01 09:11:58.703321 +-2 alice underhill 2013-03-01 09:11:58.703122 +-2 alice underhill 2013-03-01 09:11:58.703127 +-2 alice xylophone 2013-03-01 09:11:58.703105 +-2 bob falkner 2013-03-01 09:11:58.703071 +-2 bob king 2013-03-01 09:11:58.703236 +-2 bob ovid 2013-03-01 09:11:58.703285 +-2 bob van buren 2013-03-01 09:11:58.703218 +-2 bob xylophone 2013-03-01 09:11:58.703219 +-2 calvin xylophone 2013-03-01 09:11:58.703083 +-2 david falkner 2013-03-01 09:11:58.703254 +-2 david laertes 2013-03-01 09:11:58.703076 +-2 david miller 2013-03-01 09:11:58.703238 -3 alice allen 2013-03-01 09:11:58.703323 -3 alice davidson 2013-03-01 09:11:58.703226 -3 alice falkner 2013-03-01 09:11:58.703304 @@ -950,20 +964,6 @@ POSTHOOK: Input: default@text_llap2 -3 yuri xylophone 2013-03-01 09:11:58.703258 -3 zach thompson 2013-03-01 09:11:58.703252 -3 zach young 2013-03-01 09:11:58.703191 --2 alice carson 2013-03-01 09:11:58.703074 --2 alice nixon 2013-03-01 09:11:58.703321 --2 alice underhill 2013-03-01 09:11:58.703122 --2 alice underhill 2013-03-01 09:11:58.703127 --2 alice xylophone 2013-03-01 09:11:58.703105 --2 bob falkner 2013-03-01 09:11:58.703071 --2 bob king 2013-03-01 09:11:58.703236 --2 bob ovid 2013-03-01 09:11:58.703285 --2 bob van buren 2013-03-01 09:11:58.703218 --2 bob xylophone 2013-03-01 09:11:58.703219 --2 calvin xylophone 2013-03-01 09:11:58.703083 --2 david falkner 2013-03-01 09:11:58.703254 --2 david laertes 2013-03-01 09:11:58.703076 --2 david miller 2013-03-01 09:11:58.703238 PREHOOK: query: select csmallint, cstring1, cboolean2 from text_llap100 order by csmallint, cstring1, cboolean2 PREHOOK: type: QUERY PREHOOK: Input: default@text_llap100 @@ -972,76 +972,86 @@ POSTHOOK: query: select csmallint, cstring1, cboolean2 from text_llap100 order b POSTHOOK: type: QUERY POSTHOOK: Input: default@text_llap100 #### A masked pattern was here #### --15912 cvLH6Eat2yFsyy7p NULL --15813 cvLH6Eat2yFsyy7p NULL --15790 cvLH6Eat2yFsyy7p NULL --15549 cvLH6Eat2yFsyy7p NULL --15431 cvLH6Eat2yFsyy7p NULL --15059 cvLH6Eat2yFsyy7p NULL --14965 cvLH6Eat2yFsyy7p NULL --14739 cvLH6Eat2yFsyy7p NULL --14379 cvLH6Eat2yFsyy7p NULL --14072 cvLH6Eat2yFsyy7p NULL --13372 cvLH6Eat2yFsyy7p NULL --13326 cvLH6Eat2yFsyy7p NULL --13229 cvLH6Eat2yFsyy7p NULL --12472 cvLH6Eat2yFsyy7p NULL --12171 cvLH6Eat2yFsyy7p NULL --12083 cvLH6Eat2yFsyy7p NULL --11492 cvLH6Eat2yFsyy7p NULL --11320 cvLH6Eat2yFsyy7p NULL --10909 cvLH6Eat2yFsyy7p NULL --10268 cvLH6Eat2yFsyy7p NULL --10154 cvLH6Eat2yFsyy7p NULL -10129 cvLH6Eat2yFsyy7p NULL --9566 cvLH6Eat2yFsyy7p NULL --7984 cvLH6Eat2yFsyy7p NULL --7964 cvLH6Eat2yFsyy7p NULL --7824 cvLH6Eat2yFsyy7p NULL --7735 cvLH6Eat2yFsyy7p NULL --7449 cvLH6Eat2yFsyy7p NULL --7353 cvLH6Eat2yFsyy7p NULL --7183 cvLH6Eat2yFsyy7p NULL --6922 cvLH6Eat2yFsyy7p NULL --6583 cvLH6Eat2yFsyy7p NULL --4812 cvLH6Eat2yFsyy7p NULL --4667 cvLH6Eat2yFsyy7p NULL --4490 cvLH6Eat2yFsyy7p NULL --4463 cvLH6Eat2yFsyy7p NULL --4255 cvLH6Eat2yFsyy7p NULL --4213 cvLH6Eat2yFsyy7p NULL --3419 cvLH6Eat2yFsyy7p NULL --3166 cvLH6Eat2yFsyy7p NULL --3045 cvLH6Eat2yFsyy7p NULL --3012 cvLH6Eat2yFsyy7p NULL --2468 cvLH6Eat2yFsyy7p NULL --2013 cvLH6Eat2yFsyy7p NULL --1990 cvLH6Eat2yFsyy7p NULL --1724 cvLH6Eat2yFsyy7p NULL --1299 cvLH6Eat2yFsyy7p NULL --1254 cvLH6Eat2yFsyy7p NULL +-10154 cvLH6Eat2yFsyy7p NULL +-10268 cvLH6Eat2yFsyy7p NULL -1027 cvLH6Eat2yFsyy7p NULL --814 cvLH6Eat2yFsyy7p NULL +-10909 cvLH6Eat2yFsyy7p NULL +-11320 cvLH6Eat2yFsyy7p NULL +-11492 cvLH6Eat2yFsyy7p NULL +-12083 cvLH6Eat2yFsyy7p NULL +-12171 cvLH6Eat2yFsyy7p NULL +-12472 cvLH6Eat2yFsyy7p NULL +-1254 cvLH6Eat2yFsyy7p NULL +-1299 cvLH6Eat2yFsyy7p NULL +-13229 cvLH6Eat2yFsyy7p NULL +-13326 cvLH6Eat2yFsyy7p NULL +-13372 cvLH6Eat2yFsyy7p NULL +-14072 cvLH6Eat2yFsyy7p NULL +-14379 cvLH6Eat2yFsyy7p NULL +-14739 cvLH6Eat2yFsyy7p NULL +-14965 cvLH6Eat2yFsyy7p NULL +-15059 cvLH6Eat2yFsyy7p NULL +-15431 cvLH6Eat2yFsyy7p NULL +-15549 cvLH6Eat2yFsyy7p NULL +-15790 cvLH6Eat2yFsyy7p NULL +-15813 cvLH6Eat2yFsyy7p NULL +-15912 cvLH6Eat2yFsyy7p NULL +-1724 cvLH6Eat2yFsyy7p NULL +-1990 cvLH6Eat2yFsyy7p NULL +-2013 cvLH6Eat2yFsyy7p NULL +-2468 cvLH6Eat2yFsyy7p NULL +-3012 cvLH6Eat2yFsyy7p NULL +-3045 cvLH6Eat2yFsyy7p NULL +-3166 cvLH6Eat2yFsyy7p NULL +-3419 cvLH6Eat2yFsyy7p NULL +-4213 cvLH6Eat2yFsyy7p NULL +-4255 cvLH6Eat2yFsyy7p NULL +-4463 cvLH6Eat2yFsyy7p NULL +-4490 cvLH6Eat2yFsyy7p NULL +-4667 cvLH6Eat2yFsyy7p NULL +-4812 cvLH6Eat2yFsyy7p NULL +-6583 cvLH6Eat2yFsyy7p NULL +-6922 cvLH6Eat2yFsyy7p NULL +-7183 cvLH6Eat2yFsyy7p NULL +-7353 cvLH6Eat2yFsyy7p NULL +-7449 cvLH6Eat2yFsyy7p NULL -75 cvLH6Eat2yFsyy7p NULL +-7735 cvLH6Eat2yFsyy7p NULL +-7824 cvLH6Eat2yFsyy7p NULL +-7964 cvLH6Eat2yFsyy7p NULL +-7984 cvLH6Eat2yFsyy7p NULL +-814 cvLH6Eat2yFsyy7p NULL +-9566 cvLH6Eat2yFsyy7p NULL 10 cvLH6Eat2yFsyy7p NULL -37 cvLH6Eat2yFsyy7p NULL -77 cvLH6Eat2yFsyy7p NULL -163 cvLH6Eat2yFsyy7p NULL -486 cvLH6Eat2yFsyy7p NULL -834 cvLH6Eat2yFsyy7p NULL +10688 cvLH6Eat2yFsyy7p NULL +11242 cvLH6Eat2yFsyy7p NULL +11737 cvLH6Eat2yFsyy7p NULL 1206 cvLH6Eat2yFsyy7p NULL +12161 cvLH6Eat2yFsyy7p NULL +12422 cvLH6Eat2yFsyy7p NULL +13026 cvLH6Eat2yFsyy7p NULL +13300 cvLH6Eat2yFsyy7p NULL 1345 cvLH6Eat2yFsyy7p NULL 1358 cvLH6Eat2yFsyy7p NULL +14072 cvLH6Eat2yFsyy7p NULL +14625 cvLH6Eat2yFsyy7p NULL 1475 cvLH6Eat2yFsyy7p NULL +14907 cvLH6Eat2yFsyy7p NULL +15007 cvLH6Eat2yFsyy7p NULL +15626 cvLH6Eat2yFsyy7p NULL +163 cvLH6Eat2yFsyy7p NULL 1639 cvLH6Eat2yFsyy7p NULL 2541 cvLH6Eat2yFsyy7p NULL 2617 cvLH6Eat2yFsyy7p NULL 3168 cvLH6Eat2yFsyy7p NULL +37 cvLH6Eat2yFsyy7p NULL 3856 cvLH6Eat2yFsyy7p NULL 3961 cvLH6Eat2yFsyy7p NULL 4181 cvLH6Eat2yFsyy7p NULL 4432 cvLH6Eat2yFsyy7p NULL 4587 cvLH6Eat2yFsyy7p NULL +486 cvLH6Eat2yFsyy7p NULL 4963 cvLH6Eat2yFsyy7p NULL 5521 cvLH6Eat2yFsyy7p NULL 5780 cvLH6Eat2yFsyy7p NULL @@ -1051,27 +1061,17 @@ POSTHOOK: Input: default@text_llap100 7021 cvLH6Eat2yFsyy7p NULL 7350 cvLH6Eat2yFsyy7p NULL 7476 cvLH6Eat2yFsyy7p NULL +77 cvLH6Eat2yFsyy7p NULL 7860 cvLH6Eat2yFsyy7p NULL 7952 cvLH6Eat2yFsyy7p NULL 8035 cvLH6Eat2yFsyy7p NULL +834 cvLH6Eat2yFsyy7p NULL 8353 cvLH6Eat2yFsyy7p NULL 8402 cvLH6Eat2yFsyy7p NULL 8499 cvLH6Eat2yFsyy7p NULL 9169 cvLH6Eat2yFsyy7p NULL 9366 cvLH6Eat2yFsyy7p NULL 9472 cvLH6Eat2yFsyy7p NULL -10688 cvLH6Eat2yFsyy7p NULL -11242 cvLH6Eat2yFsyy7p NULL -11737 cvLH6Eat2yFsyy7p NULL -12161 cvLH6Eat2yFsyy7p NULL -12422 cvLH6Eat2yFsyy7p NULL -13026 cvLH6Eat2yFsyy7p NULL -13300 cvLH6Eat2yFsyy7p NULL -14072 cvLH6Eat2yFsyy7p NULL -14625 cvLH6Eat2yFsyy7p NULL -14907 cvLH6Eat2yFsyy7p NULL -15007 cvLH6Eat2yFsyy7p NULL -15626 cvLH6Eat2yFsyy7p NULL PREHOOK: query: DROP TABLE text_llap PREHOOK: type: DROPTABLE PREHOOK: Input: default@text_llap diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 07af2a3..4de2eb6 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,27 +36,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnInList(col 3, values [-67, -171]) -> boolean predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -65,10 +107,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -85,31 +131,77 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 4:boolean) -> boolean predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -122,10 +214,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,27 +238,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> boolean predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -175,10 +309,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -195,31 +333,77 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean) -> boolean predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -232,10 +416,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -252,27 +440,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 3, left -2, right 1) -> boolean predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -285,10 +511,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -305,27 +535,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnNotBetween(col 3, left -610, right 608) -> boolean predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -338,10 +606,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -358,27 +630,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnBetween(col 1, left -20, right 45.9918918919) -> boolean predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -391,10 +701,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -411,31 +725,77 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnNotBetween(col 1, left -2000, right 4390.1351351351) -> boolean predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -693,10 +1053,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### 6172 -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -714,12 +1078,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -728,14 +1107,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -743,17 +1149,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -766,10 +1191,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -787,12 +1216,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -801,14 +1245,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -816,17 +1287,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -839,10 +1329,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -860,12 +1354,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -874,14 +1383,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -889,17 +1425,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -912,10 +1467,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -933,12 +1492,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -947,14 +1521,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -962,17 +1563,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index e11af12..3909ee3 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -95,20 +95,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -126,12 +130,27 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: i (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(50), avg(50.0), avg(50) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 2 + native: false + projectedOutputColumns: [0, 1, 2] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -144,7 +163,20 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) @@ -160,16 +192,33 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_char_4.q.out ql/src/test/results/clientpositive/spark/vector_char_4.q.out index 3e551bb..943a4b1 100644 --- ql/src/test/results/clientpositive/spark/vector_char_4.q.out +++ ql/src/test/results/clientpositive/spark/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,12 +146,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -155,6 +170,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 5100fc2..35dc757 100644 --- ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1225,12 +1225,16 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_sold_time_sk SIMPLE POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_warehouse_sk SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_warehouse_sk, type:int, comment:null), ] POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_web_page_sk, type:int, comment:null), ] POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(distinct ws_order_number) from web_sales PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(distinct ws_order_number) from web_sales POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1248,11 +1252,24 @@ STAGE PLANS: TableScan alias: web_sales Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] Select Operator expressions: ws_order_number (type: int) outputColumnNames: ws_order_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [16] Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 16 + native: false + projectedOutputColumns: [] keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 @@ -1261,35 +1278,88 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 1752000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_data_types.q.out ql/src/test/results/clientpositive/spark/vector_data_types.q.out index 182dad1..10c111f 100644 --- ql/src/test/results/clientpositive/spark/vector_data_types.q.out +++ ql/src/test/results/clientpositive/spark/vector_data_types.q.out @@ -95,10 +95,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -187,10 +191,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -17045922556 -PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -207,29 +215,66 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index a36efc2..3496e99 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -20,20 +20,24 @@ POSTHOOK: Lineage: decimal_vgby.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_vgby.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -50,12 +54,26 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -64,27 +82,65 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(20,10), VectorUDAFMinDecimal(col 3) -> decimal(20,10), VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFCountMerge(col 5) -> bigint, VectorUDAFMaxDecimal(col 6) -> decimal(23,14), VectorUDAFMinDecimal(col 7) -> decimal(23,14), VectorUDAFSumDecimal(col 8) -> decimal(38,18), VectorUDAFCountMerge(col 9) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 9, val 1) -> boolean predicate: (_col9 > 1) (type: boolean) Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -123,20 +179,24 @@ POSTHOOK: Input: default@decimal_vgby 6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -153,12 +213,27 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -170,7 +245,20 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col4] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) diff --git ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out index a205a2f..e4c92c7 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out @@ -72,12 +72,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -93,18 +97,40 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -117,12 +143,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -130,18 +167,35 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out index e17c00b..4a2783a 100644 --- ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -123,11 +127,24 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: t, s + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -136,12 +153,38 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -149,9 +192,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_elt.q.out ql/src/test/results/clientpositive/spark/vector_elt.q.out index bb66867..b49462a 100644 --- ql/src/test/results/clientpositive/spark/vector_elt.q.out +++ ql/src/test/results/clientpositive/spark/vector_elt.q.out @@ -1,29 +1,79 @@ -PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean + predicate: (ctinyint > 0) (type: boolean) + Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 6, 2, 16] + selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string + Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (ctinyint > 0) (type: boolean) - Select Operator - expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc @@ -47,7 +97,7 @@ POSTHOOK: Input: default@alltypesorc 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 2 cvLH6Eat2yFsyy7p 528534767 528534767 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -60,7 +110,7 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -73,22 +123,67 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: ConstantVectorExpression(val defg) -> 12:string, ConstantVectorExpression(val cc) -> 13:string, ConstantVectorExpression(val abc) -> 14:string, ConstantVectorExpression(val 2) -> 15:string, ConstantVectorExpression(val 12345) -> 16:string, ConstantVectorExpression(val 123456789012) -> 17:string, ConstantVectorExpression(val 1.25) -> 18:string, ConstantVectorExpression(val 16.0) -> 19:string, ConstantVectorExpression(val null) -> 20:string, ConstantVectorExpression(val null) -> 21:string + Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Limit - Number of rows: 1 - ListSink + ListSink PREHOOK: query: SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), diff --git ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out index 40b6877..1686064 100644 --- ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -123,12 +127,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string), b (type: bigint) outputColumnNames: t, s, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [0] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -137,14 +155,41 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -152,9 +197,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index a1c1d4d..3a9f97b 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -28,12 +28,16 @@ POSTHOOK: query: insert into table orc_table_2a values(0),(2), (3),(null),(4) POSTHOOK: type: QUERY POSTHOOK: Output: default@orc_table_2a POSTHOOK: Lineage: orc_table_2a.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -49,18 +53,45 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -73,12 +104,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -86,6 +128,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col1 input vertices: 0 Map 1 @@ -93,15 +143,35 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -122,12 +192,16 @@ POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -143,23 +217,56 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -172,12 +279,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -185,18 +303,42 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -249,12 +391,16 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@orc_table_2b POSTHOOK: Lineage: orc_table_2b.c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -270,18 +416,45 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -294,12 +467,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -307,6 +491,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [2, 0] + smallTableMapping: [2] outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -314,15 +506,36 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Local Work: Map Reduce Local Work @@ -343,12 +556,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -364,18 +581,45 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -388,12 +632,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -401,18 +656,44 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: string Local Work: Map Reduce Local Work @@ -433,12 +714,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 3 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -454,18 +739,45 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -478,12 +790,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -491,6 +814,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2, 0] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -498,15 +830,37 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 1] + selectExpressions: LongColMultiplyLongScalar(col 0, val 2) -> 3:long, LongColMultiplyLongScalar(col 0, val 5) -> 4:long Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, bigint, bigint Local Work: Map Reduce Local Work @@ -527,12 +881,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 6 15 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -548,18 +906,45 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -572,12 +957,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -585,6 +981,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -592,15 +997,36 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Local Work: Map Reduce Local Work @@ -621,12 +1047,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -642,18 +1072,45 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -666,12 +1123,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -679,6 +1147,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [1, 2, 0] + smallTableMapping: [2] outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -686,15 +1163,36 @@ STAGE PLANS: Select Operator expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string Local Work: Map Reduce Local Work @@ -715,12 +1213,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### 3 three THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -736,18 +1238,45 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -760,12 +1289,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -773,6 +1313,15 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col2, _col3 input vertices: 1 Map 2 @@ -780,15 +1329,36 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: string Local Work: Map Reduce Local Work @@ -809,12 +1379,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -830,18 +1404,45 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -854,12 +1455,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -867,6 +1479,15 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 @@ -874,15 +1495,36 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: string Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index c08fbda..91af229 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,6 +16,10 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -40,6 +44,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 4 @@ -56,6 +68,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -103,10 +123,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index c84363f..5b3ee56 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -1,15 +1,19 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -38,6 +42,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work Map 4 @@ -53,6 +61,10 @@ STAGE PLANS: outputColumnNames: l_orderkey Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 @@ -62,17 +74,37 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 5 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -92,6 +124,10 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -101,12 +137,29 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -117,6 +170,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 @@ -127,6 +184,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col3 input vertices: 1 Reducer 5 @@ -134,9 +195,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -173,18 +241,22 @@ POSTHOOK: Input: default@lineitem 61336 8855 64128 9141 82704 7721 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3, Stage-4 @@ -213,6 +285,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work @@ -239,6 +315,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work Map 7 @@ -264,6 +344,10 @@ STAGE PLANS: 0 Reducer 6 Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col2 (type: int) mode: hash outputColumnNames: _col0 @@ -273,14 +357,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work Reducer 8 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -291,11 +392,21 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col3 input vertices: 0 Map 4 Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -304,18 +415,39 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE Reducer 9 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) @@ -335,6 +467,10 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -344,12 +480,29 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -360,6 +513,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col3, _col4 input vertices: 1 Map 3 @@ -370,6 +527,10 @@ STAGE PLANS: keys: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col3 input vertices: 1 Reducer 9 @@ -377,9 +538,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -401,6 +569,10 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -410,17 +582,37 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 6 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index 4710a73..cbf7d03 100644 --- ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,12 +128,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 7 + native: false + projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -138,14 +156,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -153,17 +198,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out index 8e695c6..c664e8f 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out @@ -58,12 +58,16 @@ POSTHOOK: Input: default@orc_table_2 4 FOUR NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -79,15 +83,38 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -100,9 +127,16 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -110,18 +144,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableOuterKeyMapping: 1 -> 2 + bigTableRetainedColumns: [0, 1, 2] + bigTableValueColumns: [0, 1] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2, 3] + smallTableMapping: [3] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, string Local Work: Map Reduce Local Work @@ -147,12 +208,16 @@ one 1 NULL NULL one 1 NULL NULL three 3 3 THREE two 2 2 TWO -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -168,15 +233,38 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -189,9 +277,16 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -199,18 +294,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableOuterKeyMapping: 0 -> 3 + bigTableRetainedColumns: [0, 1, 3] + bigTableValueColumns: [0, 1] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [2, 3, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, bigint Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index b5ebd24..ca4ce15 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -214,18 +214,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -241,15 +245,38 @@ STAGE PLANS: TableScan alias: cd Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col2 (type: int) 1 _col2 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -262,9 +289,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -272,18 +306,45 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col2 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableOuterKeyMapping: 2 -> 14 + bigTableRetainedColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14] + bigTableValueColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + smallTableMapping: [12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, double, double, string, string, timestamp, timestamp, bigint, bigint Local Work: Map Reduce Local Work @@ -326,18 +387,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -353,15 +418,38 @@ STAGE PLANS: TableScan alias: hd Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -374,9 +462,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -384,18 +479,42 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -524,7 +643,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -533,7 +652,7 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -542,6 +661,10 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -557,15 +680,38 @@ STAGE PLANS: TableScan alias: cd Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work Map 4 @@ -573,15 +719,38 @@ STAGE PLANS: TableScan alias: hd Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -596,9 +765,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -606,6 +782,14 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -616,32 +800,84 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 4 Statistics: Num rows: 17 Data size: 4843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 4262aa1..8a9f90f 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -224,7 +224,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -233,7 +233,7 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -242,6 +242,10 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -257,15 +261,38 @@ STAGE PLANS: TableScan alias: cd Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work Map 4 @@ -273,15 +300,38 @@ STAGE PLANS: TableScan alias: hd Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -296,9 +346,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int), cbigint (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -306,6 +363,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableRetainedColumns: [3] + bigTableValueColumns: [3] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [3] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -316,32 +381,84 @@ STAGE PLANS: keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: [3] + bigTableRetainedColumns: [3] + bigTableValueColumns: [3] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumns: [3] outputColumnNames: _col1 input vertices: 1 Map 4 Statistics: Num rows: 24 Data size: 6336 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out index 30a0eee..dbbfd34 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out @@ -224,7 +224,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -233,7 +233,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -242,117 +242,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -374,7 +264,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -383,7 +273,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -392,117 +282,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring2 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -524,7 +304,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 28 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -533,7 +313,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -542,117 +322,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out index da12cf4..ffce9e6 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out @@ -244,85 +244,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - input vertices: - 1 Map 2 - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -391,85 +325,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd @@ -894,7 +762,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -903,7 +771,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -912,117 +780,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 36 Data size: 5307 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index 55e9287..4f25253 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -62,105 +62,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -180,111 +96,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -304,111 +130,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6058 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -428,111 +164,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6248 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -552,7 +198,7 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -561,7 +207,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -570,117 +216,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 379 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 7329 Data size: 2451 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -766,105 +302,21 @@ POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -884,111 +336,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 39112 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1008,111 +370,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 11171 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1132,111 +404,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 14371 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1256,7 +438,7 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 17792 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1265,7 +447,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1274,117 +456,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 362 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 7329 Data size: 3335 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/results/clientpositive/spark/vector_string_concat.q.out ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index 6dec92a..12132ae 100644 --- ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -95,32 +95,77 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT s AS `string`, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT s AS `string`, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Select Operator + expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 12, 11] + selectExpressions: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 12:String_Family, StringGroupColConcatStringScalar(col 13, val |)(children: StringScalarConcatStringGroupCol(val |, col 11)(children: StringRTrim(col 13)(children: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 13:String_Family) -> 11:String) -> 13:String_Family) -> 11:String_Family + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 20 Processor Tree: - TableScan - alias: over1korc - Select Operator - expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) - outputColumnNames: _col0, _col1, _col2 - Limit - Number of rows: 20 - ListSink + ListSink PREHOOK: query: SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, @@ -259,20 +304,24 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -290,11 +339,25 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [19] + selectExpressions: StringGroupConcatColCol(col 17, col 18)(children: StringGroupColConcatStringScalar(col 18, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 17)(children: CastLongToString(col 13)(children: CastDoubleToLong(col 15)(children: DoubleColAddDoubleScalar(col 16, val 1.0)(children: DoubleColDivideDoubleScalar(col 15, val 3.0)(children: CastLongToDouble(col 14)(children: LongColSubtractLongScalar(col 13, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 13:long) -> 14:long) -> 15:double) -> 16:double) -> 15:double) -> 13:long) -> 17:String) -> 18:String_Family) -> 17:String_Family, CastLongToString(col 13)(children: VectorUDFYearDate(col 12, field YEAR) -> 13:long) -> 18:String) -> 19:String_Family Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 19 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -303,13 +366,39 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 @@ -317,20 +406,42 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out index 9a46ee1..1c8e479 100644 --- ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi v POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,12 +146,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -155,6 +170,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 357c135..5086f53 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -6,7 +6,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -14,6 +14,10 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -31,42 +35,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 1) -> tinyint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -98,16 +160,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,41 +191,99 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -185,7 +309,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39856 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -198,7 +322,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -211,6 +335,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -242,7 +370,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -256,6 +397,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -304,7 +452,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -312,7 +460,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -320,6 +468,10 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -337,42 +489,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 3) -> bigint, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -404,16 +614,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -431,41 +645,99 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -491,7 +763,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1698460028409 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -504,7 +776,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -517,6 +789,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -548,7 +824,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -562,6 +851,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -610,7 +906,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -618,7 +914,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -626,6 +922,10 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -643,42 +943,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 4) -> float, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFCount(col 4) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 0) -> float, VectorUDAFMaxDouble(col 1) -> float, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -710,16 +1068,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -737,41 +1099,99 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cfloat) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 4) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -797,7 +1217,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39479.635992884636 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -810,7 +1230,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -823,6 +1243,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -854,7 +1278,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -868,6 +1305,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -917,7 +1361,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -944,7 +1388,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -971,6 +1415,10 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -987,15 +1435,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterDoubleColLessDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterLongColEqualLongScalar(col 11, val 1) -> boolean, FilterLongScalarEqualLongColumn(val 3569, col 0)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 4, 0] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 3) -> struct, VectorUDAFStdPopLong(col 3) -> struct, VectorUDAFVarSampLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE @@ -1004,7 +1470,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) @@ -29908,23 +30387,108 @@ POSTHOOK: query: explain extended select * from alltypesorc where (cint=45 and cfloat=3.02) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypesorc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 0 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 0 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypesorc + name: default.alltypesorc + Truncated Path -> Alias: + /alltypesorc [alltypesorc] + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink + ListSink PREHOOK: query: explain extended select * from alltypesorc where (cint=49 and cfloat=3.5) or @@ -29937,23 +30501,108 @@ POSTHOOK: query: explain extended select * from alltypesorc where (cint=45 and cfloat=3.02) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypesorc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 0 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 0 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypesorc + name: default.alltypesorc + Truncated Path -> Alias: + /alltypesorc [alltypesorc] + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink + ListSink PREHOOK: query: explain extended select * from alltypesorc where (cint=49 or cfloat=3.5) and @@ -29966,23 +30615,108 @@ POSTHOOK: query: explain extended select * from alltypesorc where (cint=45 or cfloat=3.02) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypesorc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 0 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} + bucket_count -1 + column.name.delimiter , + columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 + columns.comments + columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean +#### A masked pattern was here #### + name default.alltypesorc + numFiles 1 + numRows 12288 + rawDataSize 0 + serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 377237 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypesorc + name: default.alltypesorc + Truncated Path -> Alias: + /alltypesorc [alltypesorc] + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink + ListSink PREHOOK: query: explain extended select count(*),cstring1 from alltypesorc where cstring1='biology' or cstring1='history' diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index 1a30288..a6ef031 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -31,7 +31,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -64,6 +64,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -81,15 +85,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -101,7 +124,20 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -120,16 +156,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -250,7 +303,7 @@ NULL -63 1969-12-31 16:00:15.436 -63.0 NULL 63 -63 0 -63.0 -0.0 63.0 -5011.839 0 NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -283,7 +336,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -316,6 +369,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -333,15 +390,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -353,7 +429,20 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -372,16 +461,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index f1fca9f..1541908 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -31,7 +31,7 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -64,6 +64,10 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -101,7 +105,20 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) @@ -120,6 +137,13 @@ STAGE PLANS: value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 42d888f..1d925c5 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -29,7 +29,7 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -60,6 +60,10 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -97,7 +101,20 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) @@ -116,6 +133,13 @@ STAGE PLANS: value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index e6fca7d..e731c2d 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -38,6 +38,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -74,7 +78,20 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/spark/vectorization_17.q.out ql/src/test/results/clientpositive/spark/vectorization_17.q.out index df117b4..a8f401b 100644 --- ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -22,7 +22,7 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -46,6 +46,10 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -75,8 +79,23 @@ STAGE PLANS: Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_7.q.out ql/src/test/results/clientpositive/spark/vectorization_7.q.out index 11f0083..9783907 100644 --- ql/src/test/results/clientpositive/spark/vectorization_7.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_7.q.out @@ -1,6 +1,4 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS - -EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -27,9 +25,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: -- SORT_QUERY_RESULTS - -EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -56,6 +52,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -72,35 +72,78 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -189,8 +232,7 @@ NULL NULL -7196 -61 1969-12-31 15:59:44.823 NULL NULL 0 7196 61 78 NULL NULL 61 NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 -PREHOOK: query: -- double compare timestamp -EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -217,8 +259,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: -- double compare timestamp -EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -245,6 +286,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -261,35 +306,78 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorization_8.q.out ql/src/test/results/clientpositive/spark/vectorization_8.q.out index 1d4f32b..b5c056f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_8.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_8.q.out @@ -1,6 +1,4 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS - -EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -25,9 +23,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: -- SORT_QUERY_RESULTS - -EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -52,6 +48,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,35 +68,78 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator - predicate: ((cstring2 is not null and ((UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0))) or ((cfloat < -6432.0) or (cboolean1 is not null and (cdouble = 988888.0)))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 10.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 16.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) + expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: double) + key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: double), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -176,8 +219,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:43.783 -200.0 NULL NULL -11.0 200.0 -5438.15 51400.0 NULL 1.2116287E7 200.0 9.611 11.0 NULL 1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL 1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL -PREHOOK: query: -- double compare timestamp -EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -202,8 +244,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: -- double compare timestamp -EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -228,6 +269,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -244,35 +289,78 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator - predicate: ((cstring2 is not null and ((UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998))) or ((cfloat < -6432.0) or (cboolean1 is not null and (cdouble = 988888.0)))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 12.503)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 11.998)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) + expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: double) + key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: double), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index e6fca7d..e731c2d 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -38,6 +38,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -74,7 +78,20 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out index 9a6cb52..b609ab0 100644 --- ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out @@ -12,28 +12,77 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_decimal_test + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean + predicate: (cint is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdate (type: date), cdecimal (type: decimal(20,10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: date_decimal_test - Filter Operator - predicate: (cint is not null and cdouble is not null) (type: boolean) - Select Operator - expressions: cdate (type: date), cdecimal (type: decimal(20,10)) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vectorization_div0.q.out ql/src/test/results/clientpositive/spark/vectorization_div0.q.out index 9bbe22c..baea88f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_div0.q.out @@ -1,25 +1,70 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: (cdouble / 0.0) (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 100 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: (cdouble / 0.0) (type: double) - outputColumnNames: _col0 - Limit - Number of rows: 100 - ListSink + ListSink PREHOOK: query: select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY @@ -129,14 +174,18 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -153,32 +202,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 17] + selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -301,14 +392,18 @@ POSTHOOK: Input: default@alltypesorc 59347745 NULL 0.000000020219807846111 60229567 NULL 0.000000019923769334088 60330397 NULL 0.000000019890470801974 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -325,32 +420,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 16, 14, 17] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 1, 3, 4] Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out index 23d7ab3..361384f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out @@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -76,8 +80,23 @@ STAGE PLANS: Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out index 59a3be0..1f1bb30 100644 --- ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -37,7 +41,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 4d8f87b..47664ce 100644 --- ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: EXPLAIN SELECT AVG(cint), +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -34,7 +35,8 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT AVG(cint), +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -70,6 +72,10 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -86,15 +92,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) -> boolean) -> boolean predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 1, 4, 0] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 2) -> struct, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE @@ -103,7 +127,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) @@ -205,7 +242,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 1.6000018929276082E8 1.5999646129276082E8 -1.5999646129276082E8 1.5999646129276082E8 2.5598867626205912E16 -8706342.964000002 -1.6000018929276082E8 5.481251832900256E8 4.095728233294762E24 8549.657499338187 -5.481251832900256E8 3.8812872199726474E8 2.12743126884874112E17 3.0054786945575034E17 -5.700752675298234 -3.0054786945575034E17 3.0054786945575034E17 973579.3664121237 5.48222463472403E8 -973579.3664121237 -18.377427808018613 -64 2044 -6.573680812059066E-5 18.377427808018613 -PREHOOK: query: EXPLAIN SELECT MAX(cint), +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -238,7 +276,8 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT MAX(cint), +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -271,6 +310,10 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -287,15 +330,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 3, val 197) -> boolean, FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -26.28) -> boolean, FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 1) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss.*) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 4, val 79.5530014038086) -> boolean, FilterStringColLikeStringScalar(col 7, pattern 10%) -> boolean) -> boolean) -> boolean predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 5, 0] Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE @@ -304,7 +365,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) @@ -400,7 +474,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 511 5454.512308361625 1626869520 7.2647256545687792E16 -PREHOOK: query: EXPLAIN SELECT VAR_POP(cbigint), +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -432,7 +507,8 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT VAR_POP(cbigint), +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -464,6 +540,10 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -480,15 +560,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2, 5] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE @@ -497,7 +595,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) @@ -592,7 +703,8 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 2.5109214708345636E18 -2.5109214708345636E18 5.0218429416691272E18 2780 75.198 62 2.5109214708345661E18 2.5109214708345636E18 -1.0 2780 -2780 9460.675803068349 -2.5109214708345636E18 -2118360 1072872630 -2118298 -2.5109214697616911E18 185935.34910862707 0 758 -1.733509234828496 -3728 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN SELECT AVG(ctinyint), +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -614,7 +726,8 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT AVG(ctinyint), +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -636,6 +749,10 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -652,15 +769,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) outputColumnNames: ctinyint, cbigint, cint, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 2, 4] Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFVarPopLong(col 2) -> struct, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFMaxDouble(col 4) -> float + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE @@ -669,7 +804,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) @@ -744,7 +892,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -0.5934409161894847 6980.406559083811 6979.813118167622 2141851355 -11761.597368421053 -6980.406559083811 1.5852855222071937E8 -0.5934409161894847 2.5099887741860852E16 1.52140608502098816E18 -2141851355 -13.510823917813237 79.553 -3.998255191435157E19 -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -780,7 +929,8 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -816,6 +966,10 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -832,31 +986,73 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6, pattern a.*) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val 1, col 11) -> boolean, FilterDecimalColLessDecimalScalar(col 12, val 79.553)(children: CastLongToDecimal(col 1) -> 12:decimal(8,3)) -> boolean, FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 0) -> 13:double) -> boolean, FilterDoubleColGreaterEqualDoubleColumn(col 4, col 13)(children: CastLongToFloatViaLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColGreaterLongColumn(col 0, col 3)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(9,7)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 14, 15, 17, 18, 20, 22, 24, 26, 13, 23, 28, 19, 30] + selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3) -> 14:long, LongColUnaryMinus(col 2) -> 15:long, DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 17:decimal(14,3), LongColUnaryMinus(col 1) -> 18:long, LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 20:long, LongColAddLongColumn(col 21, col 19)(children: LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 21:long, LongColUnaryMinus(col 1) -> 19:long) -> 22:long, DoubleColDivideDoubleColumn(col 13, col 23)(children: CastLongToDouble(col 2) -> 13:double, CastLongToDouble(col 2) -> 23:double) -> 24:double, DecimalColSubtractDecimalScalar(col 25, val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 25:decimal(14,3)) -> 26:decimal(15,3), DoubleColUnaryMinus(col 4) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -89010.0) -> 23:double, DoubleColDivideDoubleScalar(col 27, val 988888.0)(children: CastLongToDouble(col 0) -> 27:double) -> 28:double, LongColUnaryMinus(col 0) -> 19:long, DecimalScalarDivideDecimalColumn(val 79.553, col 29)(children: CastLongToDecimal(col 0) -> 29:decimal(3,0)) -> 30:decimal(9,7) Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(9,7)) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: boolean), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: float), KEY.reducesinkkey7 (type: timestamp), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: int), KEY.reducesinkkey12 (type: decimal(14,3)), KEY.reducesinkkey13 (type: smallint), KEY.reducesinkkey14 (type: smallint), KEY.reducesinkkey15 (type: smallint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(15,3)), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: tinyint), KEY.reducesinkkey22 (type: decimal(9,7)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -995,7 +1191,8 @@ NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 15:59:56.04 NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 16:00:01.785 -7196 -1639157869 6110780535632 NULL NULL 7196 -14392 -7196 NULL NULL 64.0 6.4051596E8 -6.471915929812072E-5 64 -1.2430156 NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 16:00:11.912 -7196 -1615920595 6024151978160 NULL NULL 7196 -14392 -7196 NULL NULL 64.0 6.4051596E8 -6.471915929812072E-5 64 -1.2430156 NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 16:00:12.339 -7196 1805860756 -6732248898368 NULL NULL 7196 -14392 -7196 NULL NULL 64.0 6.4051596E8 -6.471915929812072E-5 64 -1.2430156 -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1030,7 +1227,8 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1065,6 +1263,10 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1081,31 +1283,73 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28] + selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) sort order: +++++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: boolean), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: timestamp), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: decimal(5,3)), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: float), KEY.reducesinkkey15 (type: float), KEY.reducesinkkey16 (type: float), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: bigint), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: bigint), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey21 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1195,7 +1439,8 @@ POSTHOOK: Input: default@alltypesorc -462839731 988888 ss false -51.0 NULL NULL NULL Lml5J2QBU77 false -468.04059812638036 44.210 468.04059812638036 10.175 51.0 -102.0 -102.0 NULL NULL -988888 417.04059812638036 NULL 3569 NULL NULL -635141101 -89010 ss false -51.0 NULL NULL NULL rVWAj4N1MCg8Scyp7wj2C true 7135.6151106617235 -69.746 -7135.6151106617235 10.175 51.0 -102.0 -102.0 NULL NULL 89010 -7186.6151106617235 NULL 3569 NULL NULL WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1229,7 +1474,8 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1263,6 +1509,10 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1279,32 +1529,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 12, val -26.28)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterStringGroupColGreaterEqualStringScalar(col 6, val ss) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 0, val -89010)(children: col 0) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13, col 4)(children: CastLongToFloatViaLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val -26.28, col 12)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean) -> boolean) -> boolean predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 6, 11, 9, 5, 4, 3, 1, 10, 14, 15, 16, 13, 18, 19, 20, 22, 25, 27, 24, 17, 28] + selectExpressions: LongColAddLongColumn(col 2, col 1)(children: col 1) -> 14:long, LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 15:long, LongColUnaryMinus(col 3) -> 16:long, DoubleColUnaryMinus(col 4) -> 13:double, LongColAddLongColumn(col 17, col 3)(children: LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 17:long) -> 18:long, DoubleColDivideDoubleColumn(col 5, col 5) -> 19:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColMultiplyLongColumn(col 17, col 21)(children: col 17, LongColUnaryMinus(col 3) -> 21:long) -> 22:long, DoubleColAddDoubleColumn(col 23, col 24)(children: DoubleColUnaryMinus(col 5) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double, DecimalScalarDivideDecimalColumn(val -1.389, col 26)(children: CastLongToDecimal(col 0) -> 26:decimal(3,0)) -> 27:decimal(8,7), DoubleColModuloDoubleColumn(col 23, col 5)(children: CastLongToDouble(col 3) -> 23:double) -> 24:double, LongColUnaryMinus(col 1) -> 17:long, LongColAddLongColumn(col 1, col 21)(children: col 1, LongColAddLongColumn(col 2, col 1)(children: col 1) -> 21:long) -> 28:long Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(8,7)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 75 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1465,7 +1757,8 @@ NULL NULL true 1969-12-31 15:59:58.456 15601.0 -62.0 667693308 15601 NULL NULL 6 NULL NULL true 1969-12-31 15:59:58.456 15601.0 -63.0 -200542601 15601 NULL NULL -200542538 200542601 63.0 -401085139 1.0 -15601.0 NULL -2.00558202E8 0.0220476 -7347.0 -15601 NULL NULL NULL true 1969-12-31 15:59:58.456 15601.0 -63.0 -721244708 15601 NULL NULL -721244645 721244708 63.0 -1442489353 1.0 -15601.0 NULL -7.21260309E8 0.0220476 -10478.0 -15601 NULL NULL NULL true 1969-12-31 15:59:58.456 15601.0 -64.0 -1809291815 15601 NULL NULL -1809291751 1809291815 64.0 -3618583566 1.0 -15601.0 NULL -1.809307416E9 0.0217031 -12643.0 -15601 NULL -PREHOOK: query: EXPLAIN SELECT ctimestamp1, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1492,7 +1785,8 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1519,6 +1813,10 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1535,32 +1833,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalScalarGreaterEqualDecimalColumn(val -1.389, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterLongScalarGreaterLongColumn(val -6432, col 1)(children: col 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5, col 4)(children: col 4) -> boolean, FilterStringGroupColLessEqualStringScalar(col 7, val a) -> boolean) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern ss%) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val 10.175, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean) -> boolean predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 7, 5, 4, 3, 1, 15, 16, 14, 17, 18, 20, 19, 21, 22, 24] + selectExpressions: DoubleColDivideDoubleScalar(col 14, val 3569.0)(children: CastLongToDouble(col 3) -> 14:double) -> 15:double, LongScalarSubtractLongColumn(val -257, col 1)(children: col 1) -> 16:long, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4) -> 14:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColMultiplyDoubleScalar(col 5, val 10.175) -> 18:double, DoubleColDivideDoubleColumn(col 19, col 4)(children: col 19, col 4) -> 20:double, DoubleColUnaryMinus(col 4) -> 19:double, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 21:long, DoubleColUnaryMinus(col 5) -> 22:double, DoubleColMultiplyDoubleColumn(col 5, col 23)(children: DoubleColUnaryMinus(col 5) -> 23:double) -> 24:double Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: float), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14] Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 45 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1676,7 +2016,8 @@ POSTHOOK: Input: default@alltypesorc NULL 4hA4KQj2vD3fI6gX82220d 12329.0 NULL -1887561756 12329 -528876.9279910339 -12586 NULL -12329.0 125447.57500000001 NULL NULL -3104 -12329.0 -1.52004241E8 NULL 4hA4KQj2vD3fI6gX82220d 477.0 NULL -1887561756 477 -528876.9279910339 -734 NULL -477.0 4853.475 NULL NULL -326 -477.0 -227529.0 NULL xH7445Rals48VOulSyR5F 10221.0 NULL -1645852809 10221 -461152.37013168953 -10478 NULL -10221.0 103998.675 NULL NULL 5022 -10221.0 -1.04468841E8 -PREHOOK: query: EXPLAIN SELECT csmallint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1696,7 +2037,8 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT csmallint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1716,6 +2058,10 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1733,15 +2079,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1, val -257)(children: col 1) -> boolean, FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val -6432, col 1)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterLongColLessEqualLongColumn(col 0, col 2)(children: col 0) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) outputColumnNames: csmallint, cbigint, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3, 0] Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1753,7 +2118,20 @@ STAGE PLANS: Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) @@ -1772,16 +2150,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: decimal(10,9)), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1859,7 +2254,8 @@ POSTHOOK: Input: default@alltypesorc -89 -14 0.0 0.015606742 NULL NULL 14 0.0 -14 1 89011 -95 -20 0.0 0.014621053 NULL NULL 20 0.0 -20 1 89011 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN SELECT cdouble, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -1886,7 +2282,8 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -1913,6 +2310,10 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1930,15 +2331,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 2563.58) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3, col 2)(children: col 2) -> boolean, FilterLongColLessLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterDecimalScalarEqualDecimalColumn(val 2563.58, col 12)(children: CastLongToDecimal(col 0) -> 12:decimal(6,2)) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val -5638.15, col 14)(children: CastLongToDecimal(col 3) -> 14:decimal(21,2)) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cfloat (type: float) outputColumnNames: cdouble, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 4] Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) + Group By Vectorization: + aggregators: VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFSumDouble(col 5) -> double + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 5 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -1950,7 +2370,20 @@ STAGE PLANS: Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) @@ -1969,13 +2402,27 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col12 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2047,7 +2494,8 @@ ORDER BY cdouble POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -PREHOOK: query: EXPLAIN SELECT ctimestamp1, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2102,7 +2550,8 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2157,6 +2606,10 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2174,15 +2627,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColNotEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean, SelectColumnIsNotNull(col 11) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss) -> boolean, FilterDoubleScalarLessDoubleColumn(val -3.0, col 12)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean) -> boolean, FilterDoubleColEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 10) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 6, 2, 1, 0, 4, 5] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) + Group By Vectorization: + aggregators: VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFVarSampLong(col 1) -> struct, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFVarSampDouble(col 4) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFVarPopLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 8, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2194,7 +2666,20 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) @@ -2213,16 +2698,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: tinyint), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: double), KEY.reducesinkkey22 (type: double), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey24 (type: double), KEY.reducesinkkey25 (type: double), KEY.reducesinkkey26 (type: double), KEY.reducesinkkey27 (type: tinyint), KEY.reducesinkkey28 (type: double), KEY.reducesinkkey29 (type: double), KEY.reducesinkkey30 (type: double), KEY.reducesinkkey31 (type: double), KEY.reducesinkkey32 (type: decimal(8,6)), KEY.reducesinkkey33 (type: double), KEY.reducesinkkey34 (type: bigint), KEY.reducesinkkey35 (type: double), KEY.reducesinkkey36 (type: bigint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey38 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 8, 38] Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2399,7 +2901,8 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:46.82 NULL NULL NULL NULL 15601.0 NULL NULL 1 -1 NULL -46 NULL NULL 0.0 NULL NULL NULL 0.0 6.522017819370554E-4 NULL 0.0 NULL NULL -46.0 NULL 6.522017819364598E-4 46 15601.0 0.0 NULL NULL -0.571304 0.0 NULL NULL 1 1 NULL 1969-12-31 15:59:46.847 NULL NULL NULL NULL -7196.0 NULL NULL 1 -1 NULL -26 NULL NULL 0.0 NULL NULL NULL 0.0 -0.0014139799888827128 NULL 0.0 NULL NULL -26.0 NULL 0.001413979988882123 26 -7196.0 0.0 NULL NULL -1.010769 0.0 NULL NULL 1 1 NULL 1969-12-31 15:59:46.915 NULL NULL NULL NULL -200.0 NULL NULL 1 -1 NULL -25 NULL NULL 0.0 NULL NULL NULL 0.0 -0.050875000000000004 NULL 0.0 NULL NULL -25.0 NULL 0.0 25 -200.0 0.0 NULL NULL -1.051200 0.0 NULL NULL 1 1 NULL -PREHOOK: query: EXPLAIN SELECT cboolean1, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2439,7 +2942,8 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cboolean1, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2479,6 +2983,10 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2496,15 +3004,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 4, 3, 2, 5, 0, 1] Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(col 4) -> float, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFMinLong(col 3) -> bigint, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFAvgLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -2516,7 +3043,20 @@ STAGE PLANS: Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) @@ -2535,13 +3075,27 @@ STAGE PLANS: value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(23,3)), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: float), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col12 (type: float), VALUE._col15 (type: bigint), VALUE._col16 (type: double), VALUE._col17 (type: decimal(24,3)), VALUE._col18 (type: decimal(25,3)), VALUE._col19 (type: double), VALUE._col20 (type: decimal(25,3)), VALUE._col21 (type: double), VALUE._col22 (type: double), VALUE._col23 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24] Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2648,12 +3202,16 @@ POSTHOOK: query: create table test_count(i int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_count -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2670,28 +3228,70 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2713,12 +3313,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_count #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(i) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(i) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2735,30 +3339,72 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: i (type: int) outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count(i) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2854,12 +3500,16 @@ POSTHOOK: Lineage: alltypesnullorc.cstring2 SIMPLE [(alltypesnull)alltypesnull.F POSTHOOK: Lineage: alltypesnullorc.ctimestamp1 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctimestamp2 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctinyint SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2876,28 +3526,70 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2919,12 +3611,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 12288 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2941,30 +3637,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2986,12 +3724,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3008,30 +3750,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3053,12 +3837,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3075,30 +3863,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cfloat) + Group By Vectorization: + aggregators: VectorUDAFCount(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3120,12 +3950,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3142,30 +3976,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cstring1 (type: string) outputColumnNames: cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cstring1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 6) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3187,12 +4063,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3209,30 +4089,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cboolean1 (type: boolean) outputColumnNames: cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cboolean1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 10) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out index a12ac05..7bb9284 100644 --- ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -114,9 +118,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -124,19 +135,37 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -158,12 +187,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -178,9 +211,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -188,19 +228,37 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -222,22 +280,16 @@ POSTHOOK: Input: default@vsmb_bucket_rc 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box --- explain --- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; --- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; - -explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box --- explain --- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; --- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; - -explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -252,9 +304,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -262,19 +321,37 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index c06ea94..67ed3dd 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -48,21 +52,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -112,7 +139,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -130,7 +157,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -148,6 +175,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,21 +193,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index d605185..030a71b 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -21,18 +25,40 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -47,12 +73,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -60,6 +97,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -67,9 +108,21 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 2, 12] + selectExpressions: LongColAddLongColumn(col 2, col 2) -> 12:long Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgLong(col 12) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 12) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE @@ -78,9 +131,22 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out index da862b9..90ef576 100644 --- ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -50,7 +50,7 @@ where cbigint % 500 = 0 and sin(cfloat) >= -1.0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -102,22 +102,68 @@ where cbigint % 500 = 0 and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 500) -> 12:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 13, val -1.0)(children: FuncSinDoubleToDouble(col 4) -> 13:double) -> boolean) -> boolean + predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) + Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 13, 12, 14, 15, 16, 18, 17, 19, 20, 21, 23, 22, 24, 25, 26, 27, 28, 30, 31, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 5, 3, 46, 47, 48, 49] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 13:double, FuncFloorDoubleToLong(col 5) -> 12:long, FuncCeilDoubleToLong(col 5) -> 14:long, FuncRandNoSeed -> 15:double, FuncRand -> 16:double, FuncExpDoubleToDouble(col 17)(children: FuncLnDoubleToDouble(col 5) -> 17:double) -> 18:double, FuncLnDoubleToDouble(col 5) -> 17:double, FuncLnDoubleToDouble(col 4) -> 19:double, FuncLog10DoubleToDouble(col 5) -> 20:double, FuncLog2DoubleToDouble(col 5) -> 21:double, FuncLog2DoubleToDouble(col 22)(children: DoubleColSubtractDoubleScalar(col 5, val 15601.0) -> 22:double) -> 23:double, FuncLog2DoubleToDouble(col 4) -> 22:double, FuncLog2LongToDouble(col 3) -> 24:double, FuncLog2LongToDouble(col 2) -> 25:double, FuncLog2LongToDouble(col 1) -> 26:double, FuncLog2LongToDouble(col 0) -> 27:double, VectorUDFAdaptor(log(2, cdouble)) -> 28:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 30:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 31:double, FuncSqrtDoubleToDouble(col 5) -> 29:double, FuncSqrtLongToDouble(col 3) -> 32:double, FuncBin(col 3) -> 33:String, VectorUDFAdaptor(hex(cdouble)) -> 34:string, VectorUDFAdaptor(conv(cbigint, 10, 16)) -> 35:string, FuncAbsDoubleToDouble(col 5) -> 36:double, FuncAbsLongToLong(col 0) -> 37:long, PosModLongToLong(col 2, divisor 3) -> 38:long, FuncSinDoubleToDouble(col 5) -> 39:double, FuncASinDoubleToDouble(col 5) -> 40:double, FuncCosDoubleToDouble(col 5) -> 41:double, FuncACosDoubleToDouble(col 5) -> 42:double, FuncATanDoubleToDouble(col 5) -> 43:double, FuncDegreesDoubleToDouble(col 5) -> 44:double, FuncRadiansDoubleToDouble(col 5) -> 45:double, DoubleColUnaryMinus(col 5) -> 46:double, FuncSignDoubleToDouble(col 5) -> 47:double, FuncSignLongToDouble(col 3) -> 48:double, FuncCosDoubleToDouble(col 50)(children: DoubleColAddDoubleScalar(col 49, val 3.14159)(children: DoubleColUnaryMinus(col 50)(children: FuncSinDoubleToDouble(col 49)(children: FuncLnDoubleToDouble(col 5) -> 49:double) -> 50:double) -> 49:double) -> 50:double) -> 49:double + Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) - Select Operator - expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 - ListSink + ListSink PREHOOK: query: select cdouble diff --git ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index 9cfd789..996021f 100644 --- ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +PREHOOK: query: explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +POSTHOOK: query: explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -29,6 +33,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 4 @@ -48,6 +60,14 @@ STAGE PLANS: 0 _col0 (type: smallint) 1 _col0 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -103,10 +123,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 59d0acb..d4c2228 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -104,7 +104,7 @@ POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_staging)part_staging.FieldSc POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_staging)part_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ] POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchema(name:p_size, type:int, comment:null), ] POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -114,7 +114,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -124,6 +124,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -152,6 +156,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -205,6 +217,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -235,6 +252,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -355,20 +377,24 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -402,6 +428,14 @@ STAGE PLANS: value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -472,6 +506,14 @@ STAGE PLANS: tag: 1 auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -525,6 +567,11 @@ STAGE PLANS: /part_orc [p2] Reducer 2 Needs Tagging: true + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -545,6 +592,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -575,6 +627,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -678,18 +735,22 @@ Manufacturer#5 almond antique medium spring khaki 6 -25 Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -717,6 +778,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -770,6 +839,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -862,7 +936,7 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -872,7 +946,7 @@ from noop(on part_orc order by p_name ) abc PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -882,6 +956,10 @@ from noop(on part_orc order by p_name ) abc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -910,6 +988,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -963,6 +1049,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -993,6 +1084,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1113,7 +1209,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1123,7 +1219,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1133,6 +1229,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1161,6 +1261,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1214,6 +1322,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1244,6 +1357,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1365,7 +1483,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1376,7 +1494,7 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1387,6 +1505,10 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1415,6 +1537,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1468,6 +1598,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1506,6 +1641,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -1629,20 +1769,24 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1671,6 +1815,14 @@ STAGE PLANS: value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1741,6 +1893,14 @@ STAGE PLANS: tag: 1 auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1794,6 +1954,11 @@ STAGE PLANS: /part_orc [p1] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -1828,6 +1993,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: true + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -1910,20 +2080,24 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1955,6 +2129,14 @@ STAGE PLANS: tag: 0 auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2022,6 +2204,14 @@ STAGE PLANS: value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2075,6 +2265,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: true + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -2112,6 +2307,11 @@ STAGE PLANS: MultiFileSpray: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -2195,20 +2395,24 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2251,6 +2455,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2304,6 +2514,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2334,6 +2549,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2435,7 +2655,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2444,7 +2664,7 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2453,6 +2673,10 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2496,6 +2720,12 @@ STAGE PLANS: tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2549,6 +2779,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2580,6 +2815,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2698,7 +2938,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2707,7 +2947,7 @@ from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2716,6 +2956,10 @@ from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2744,6 +2988,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2797,6 +3049,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2827,6 +3084,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2945,7 +3207,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2955,7 +3217,7 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2965,6 +3227,10 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2994,6 +3260,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3047,6 +3321,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3100,6 +3379,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3138,6 +3422,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3258,7 +3547,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3271,7 +3560,7 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3284,6 +3573,10 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3312,6 +3605,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3365,6 +3666,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3395,6 +3701,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3513,7 +3824,7 @@ Manufacturer#5 almond antique medium spring khaki 2 6208.18 Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.97 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.31 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3525,7 +3836,7 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3537,6 +3848,10 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3566,6 +3881,14 @@ STAGE PLANS: value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3636,6 +3959,14 @@ STAGE PLANS: tag: 1 auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3689,6 +4020,11 @@ STAGE PLANS: /part_orc [p1] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3723,6 +4059,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: true + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -3743,6 +4084,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3882,18 +4228,22 @@ Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35 6 -25 Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3922,6 +4272,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3975,6 +4333,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -4014,6 +4377,13 @@ STAGE PLANS: Reducer 3 Execution mode: vectorized Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -4109,7 +4479,7 @@ POSTHOOK: Output: default@mfgr_price_view POSTHOOK: Lineage: mfgr_price_view.p_brand SIMPLE [(part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.p_mfgr SIMPLE [(part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.s EXPRESSION [(part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), ] -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4117,7 +4487,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4125,6 +4495,10 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4163,6 +4537,14 @@ STAGE PLANS: value expressions: _col2 (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4216,6 +4598,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -4252,6 +4639,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) @@ -4390,7 +4782,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4406,7 +4798,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4422,6 +4814,10 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -4456,6 +4852,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4509,6 +4913,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4586,6 +4995,11 @@ STAGE PLANS: MultiFileSpray: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -4626,6 +5040,11 @@ STAGE PLANS: auto parallelism: false Reducer 5 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -4710,6 +5129,11 @@ STAGE PLANS: MultiFileSpray: false Reducer 6 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4738,6 +5162,11 @@ STAGE PLANS: auto parallelism: false Reducer 7 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4947,7 +5376,7 @@ Manufacturer#5 almond antique medium spring khaki 6 8 2 2 0.4 31 Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -4964,7 +5393,7 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -4981,6 +5410,10 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5010,6 +5443,14 @@ STAGE PLANS: value expressions: p_name (type: string), p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5063,6 +5504,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5123,6 +5569,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5161,6 +5612,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5295,7 +5751,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5312,7 +5768,7 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5329,6 +5785,10 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5359,6 +5819,14 @@ STAGE PLANS: value expressions: p_name (type: string), p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5412,6 +5880,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5449,6 +5922,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5479,6 +5957,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5509,6 +5992,11 @@ STAGE PLANS: auto parallelism: false Reducer 5 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5643,7 +6131,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5658,7 +6146,7 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5673,6 +6161,10 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5702,6 +6194,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5755,6 +6255,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5792,6 +6297,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5829,6 +6339,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5959,7 +6474,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5976,7 +6491,7 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5993,6 +6508,10 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6023,6 +6542,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6076,6 +6603,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6113,6 +6645,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6159,6 +6696,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6190,6 +6732,11 @@ STAGE PLANS: auto parallelism: false Reducer 5 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6324,7 +6871,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6340,7 +6887,7 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6356,6 +6903,10 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6385,6 +6936,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6438,6 +6997,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6491,6 +7055,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6529,6 +7098,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6661,7 +7235,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6675,7 +7249,7 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6689,6 +7263,10 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6718,6 +7296,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6771,6 +7357,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6824,6 +7415,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6855,6 +7451,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index 97f12d4..5930057 100644 --- ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -24,38 +28,91 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -79,6 +136,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) @@ -92,13 +154,27 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 4 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out index 560235d..26aab1c 100644 --- ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -20,7 +20,7 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -42,22 +42,53 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) + Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) - Select Operator - expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - ListSink + ListSink PREHOOK: query: select substr(cstring1, 1, 2) diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 9948112..d8d4466 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -63,7 +63,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_wrong POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -76,7 +76,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -89,6 +89,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -105,25 +109,60 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -206,7 +245,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -219,7 +258,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -232,6 +271,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -248,25 +291,60 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampString(col 1) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -349,7 +427,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -362,7 +440,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -375,6 +453,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -391,25 +473,60 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12] + selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 11:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -492,7 +609,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -505,7 +622,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -518,6 +635,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -534,25 +655,60 @@ STAGE PLANS: TableScan alias: alltypesorc_wrong Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: VectorUDFUnixTimeStampString(col 0) -> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, fieldStart 17, fieldLength 2) -> 9:long Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -598,20 +754,24 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -628,30 +788,72 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 1) -> timestamp, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -683,14 +885,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL 0 40 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -720,20 +926,47 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) + vectorized: false Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: round(_col0, 3) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 3) -> 1:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -759,7 +992,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -770,7 +1003,7 @@ PREHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -781,6 +1014,10 @@ POSTHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -797,12 +1034,26 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + Group By Vectorization: + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE @@ -811,7 +1062,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) diff --git ql/src/test/results/clientpositive/tez/vector_acid3.q.out ql/src/test/results/clientpositive/tez/vector_acid3.q.out new file mode 100644 index 0000000..bb4012e --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_acid3.q.out @@ -0,0 +1,29 @@ +PREHOOK: query: drop table if exists testacid1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists testacid1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table testacid1(id int) clustered by (id) into 2 buckets stored as orc tblproperties("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testacid1 +POSTHOOK: query: create table testacid1(id int) clustered by (id) into 2 buckets stored as orc tblproperties("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testacid1 +PREHOOK: query: insert into table testacid1 values (1),(2),(3),(4) +PREHOOK: type: QUERY +PREHOOK: Output: default@testacid1 +POSTHOOK: query: insert into table testacid1 values (1),(2),(3),(4) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@testacid1 +POSTHOOK: Lineage: testacid1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +hive.vectorized.execution.enabled=true +PREHOOK: query: select count(1) from testacid1 +PREHOOK: type: QUERY +PREHOOK: Input: default@testacid1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from testacid1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testacid1 +#### A masked pattern was here #### +4 diff --git ql/src/test/results/clientpositive/tez/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/tez/vector_adaptor_usage_mode.q.out new file mode 100644 index 0000000..b9875c5 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_adaptor_usage_mode.q.out @@ -0,0 +1,706 @@ +PREHOOK: query: drop table varchar_udf_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_udf_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_udf_1 +PREHOOK: query: insert overwrite table varchar_udf_1 + select key, value, key, value from src where key = '238' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: insert overwrite table varchar_udf_1 + select key, value, key, value from src where key = '238' limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_udf_1 +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF_txt +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF_txt +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE DECIMAL_UDF_txt (key decimal(20,10), value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DECIMAL_UDF_txt +POSTHOOK: query: CREATE TABLE DECIMAL_UDF_txt (key decimal(20,10), value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DECIMAL_UDF_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@decimal_udf_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@decimal_udf_txt +PREHOOK: query: CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DECIMAL_UDF +POSTHOOK: query: CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DECIMAL_UDF +PREHOOK: query: INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf_txt +PREHOOK: Output: default@decimal_udf +POSTHOOK: query: INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf_txt +POSTHOOK: Output: default@decimal_udf +POSTHOOK: Lineage: decimal_udf.key SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] +POSTHOOK: Lineage: decimal_udf.value SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:value, type:int, comment:null), ] +PREHOOK: query: drop table if exists count_case_groupby +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists count_case_groupby +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table count_case_groupby (key string, bool boolean) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@count_case_groupby +POSTHOOK: query: create table count_case_groupby (key string, bool boolean) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@count_case_groupby +PREHOOK: query: insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@count_case_groupby +POSTHOOK: query: insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@count_case_groupby +POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain vectorization expression +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:1 + Stage-1 + Map 1 + File Output Operator [FS_3] + Limit [LIM_2] (rows=1 width=356) + Number of rows:1 + Select Operator [SEL_1] (rows=1 width=356) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=1 width=356) + default@varchar_udf_1,varchar_udf_1,Tbl:COMPLETE,Col:NONE,Output:["c2","c4"] + +PREHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +true true true +PREHOOK: query: explain vectorization expression +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:1 + Stage-1 + Map 1 + File Output Operator [FS_3] + Limit [LIM_2] (rows=1 width=356) + Number of rows:1 + Select Operator [SEL_1] (rows=1 width=356) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=1 width=356) + default@varchar_udf_1,varchar_udf_1,Tbl:COMPLETE,Col:NONE,Output:["c2","c4"] + +PREHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +238 238 true +PREHOOK: query: explain vectorization expression +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:1 + Stage-1 + Map 1 + File Output Operator [FS_3] + Limit [LIM_2] (rows=1 width=356) + Number of rows:1 + Select Operator [SEL_1] (rows=1 width=356) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=1 width=356) + default@varchar_udf_1,varchar_udf_1,Tbl:COMPLETE,Col:NONE,Output:["c2","c4"] + +PREHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +replaced_238 replaced_238 true +PREHOOK: query: explain vectorization expression +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:1 + Stage-1 + Map 1 + File Output Operator [FS_3] + Limit [LIM_2] (rows=1 width=356) + Number of rows:1 + Select Operator [SEL_1] (rows=1 width=356) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=1 width=356) + default@varchar_udf_1,varchar_udf_1,Tbl:COMPLETE,Col:NONE,Output:["c2","c4"] + +PREHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +true true true +PREHOOK: query: explain vectorization expression +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_6] + Limit [LIM_5] (rows=1 width=356) + Number of rows:1 + Select Operator [SEL_4] (rows=1 width=356) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=1 width=356) + default@varchar_udf_1,varchar_udf_1,Tbl:COMPLETE,Col:NONE,Output:["c2","c4"] + +PREHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +238 238 true +PREHOOK: query: explain vectorization expression +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_6] + Limit [LIM_5] (rows=1 width=356) + Number of rows:1 + Select Operator [SEL_4] (rows=1 width=356) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=1 width=356) + default@varchar_udf_1,varchar_udf_1,Tbl:COMPLETE,Col:NONE,Output:["c2","c4"] + +PREHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +replaced_238 replaced_238 true +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 + File Output Operator [FS_2] + Select Operator [SEL_1] (rows=38 width=113) + Output:["_col0"] + TableScan [TS_0] (rows=38 width=113) + default@decimal_udf,decimal_udf,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +POSTHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +0.0 +0.0 +0.0 +0.010000000000000002 +0.04000000000000001 +0.09 +0.09 +0.10890000000000001 +0.10890000000000001 +0.11088900000000002 +0.11088900000000002 +1.0 +1.0 +1.0 +1.0E-4 +1.2544000000000002 +1.2544000000000002 +1.2544000000000002 +1.2588840000000003 +1.2588840000000003 +1.52415787532388352E18 +1.52415787532388352E18 +1.936E7 +100.0 +10000.0 +15376.0 +15675.04 +1576255.1401 +4.0 +4.0 +4.0E-4 +400.0 +40000.0 +9.8596 +9.8596 +9.8596 +9.8596 +NULL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 + File Output Operator [FS_3] + Select Operator [SEL_2] (rows=19 width=113) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_4] (rows=19 width=113) + predicate:(key = 10) + TableScan [TS_0] (rows=38 width=113) + default@decimal_udf,decimal_udf,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + +PREHOOK: query: SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +POSTHOOK: query: SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 + File Output Operator [FS_2] + Select Operator [SEL_1] (rows=38 width=113) + Output:["_col0"] + TableScan [TS_0] (rows=38 width=113) + default@decimal_udf,decimal_udf,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +POSTHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +0.0 +0.0 +0.0 +0.010000000000000002 +0.04000000000000001 +0.09 +0.09 +0.10890000000000001 +0.10890000000000001 +0.11088900000000002 +0.11088900000000002 +1.0 +1.0 +1.0 +1.0E-4 +1.2544000000000002 +1.2544000000000002 +1.2544000000000002 +1.2588840000000003 +1.2588840000000003 +1.52415787532388352E18 +1.52415787532388352E18 +1.936E7 +100.0 +10000.0 +15376.0 +15675.04 +1576255.1401 +4.0 +4.0 +4.0E-4 +400.0 +40000.0 +9.8596 +9.8596 +9.8596 +9.8596 +NULL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 + File Output Operator [FS_3] + Select Operator [SEL_2] (rows=19 width=113) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_4] (rows=19 width=113) + predicate:(key = 10) + TableScan [TS_0] (rows=38 width=113) + default@decimal_udf,decimal_udf,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + +PREHOOK: query: SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +POSTHOOK: query: SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 +PREHOOK: query: explain vectorization expression +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 vectorized + File Output Operator [FS_9] + Group By Operator [GBY_8] (rows=2 width=90) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=5 width=90) + Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 + Select Operator [SEL_1] (rows=5 width=90) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=5 width=90) + default@count_case_groupby,count_case_groupby,Tbl:COMPLETE,Col:NONE,Output:["key","bool"] + +PREHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@count_case_groupby +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count_case_groupby +#### A masked pattern was here #### +key1 1 +key2 1 +key3 0 +key4 1 +key5 0 +PREHOOK: query: explain vectorization expression +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 vectorized + File Output Operator [FS_12] + Group By Operator [GBY_11] (rows=2 width=90) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_10] + PartitionCols:_col0 + Group By Operator [GBY_9] (rows=5 width=90) + Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 + Select Operator [SEL_8] (rows=5 width=90) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=5 width=90) + default@count_case_groupby,count_case_groupby,Tbl:COMPLETE,Col:NONE,Output:["key","bool"] + +PREHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@count_case_groupby +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count_case_groupby +#### A masked pattern was here #### +key1 1 +key2 1 +key3 0 +key4 1 +key5 0 +PREHOOK: query: drop table varchar_udf_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_udf_1 +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: drop table varchar_udf_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_udf_1 +POSTHOOK: Output: default@varchar_udf_1 +PREHOOK: query: DROP TABLE DECIMAL_UDF_txt +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_udf_txt +PREHOOK: Output: default@decimal_udf_txt +POSTHOOK: query: DROP TABLE DECIMAL_UDF_txt +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_udf_txt +POSTHOOK: Output: default@decimal_udf_txt +PREHOOK: query: DROP TABLE DECIMAL_UDF +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_udf +PREHOOK: Output: default@decimal_udf +POSTHOOK: query: DROP TABLE DECIMAL_UDF +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_udf +POSTHOOK: Output: default@decimal_udf +PREHOOK: query: drop table count_case_groupby +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@count_case_groupby +PREHOOK: Output: default@count_case_groupby +POSTHOOK: query: drop table count_case_groupby +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@count_case_groupby +POSTHOOK: Output: default@count_case_groupby diff --git ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out new file mode 100644 index 0000000..779c6ef --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out @@ -0,0 +1,205 @@ +PREHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2korc +PREHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: default@vectortab2korc +POSTHOOK: Lineage: vectortab2korc.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.d SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dc SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dt SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.f SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s2, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] +PREHOOK: query: explain vectorization expression +select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: dc (type: decimal(38,18)) + outputColumnNames: dc + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(dc), max(dc), sum(dc), avg(dc) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 6) -> decimal(38,18), VectorUDAFMaxDecimal(col 6) -> decimal(38,18), VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimal(col 6) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 6) -> struct output type STRUCT requires PRIMITIVE IS false + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +POSTHOOK: query: select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +-4997414117561.546875000000000000 4994550248722.298828000000000000 -10252745435816.024410000000000000 -5399023399.587163986308583465 diff --git ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out new file mode 100644 index 0000000..8a5687a --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out @@ -0,0 +1,70 @@ +PREHOOK: query: create table testvec(id int, dt int, greg_dt string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testvec +POSTHOOK: query: create table testvec(id int, dt int, greg_dt string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testvec +PREHOOK: query: insert into table testvec +values +(1,20150330, '2015-03-30'), +(2,20150301, '2015-03-01'), +(3,20150502, '2015-05-02'), +(4,20150401, '2015-04-01'), +(5,20150313, '2015-03-13'), +(6,20150314, '2015-03-14'), +(7,20150404, '2015-04-04') +PREHOOK: type: QUERY +PREHOOK: Output: default@testvec +POSTHOOK: query: insert into table testvec +values +(1,20150330, '2015-03-30'), +(2,20150301, '2015-03-01'), +(3,20150502, '2015-05-02'), +(4,20150401, '2015-04-01'), +(5,20150313, '2015-03-13'), +(6,20150314, '2015-03-14'), +(7,20150404, '2015-04-04') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@testvec +POSTHOOK: Lineage: testvec.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: testvec.greg_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: testvec.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 vectorized + File Output Operator [FS_14] + Group By Operator [GBY_13] (rows=1 width=188) + Output:["_col0","_col1"],aggregations:["max(VALUE._col0)","max(VALUE._col1)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_12] + Group By Operator [GBY_11] (rows=1 width=188) + Output:["_col0","_col1"],aggregations:["max(dt)","max(greg_dt)"] + Select Operator [SEL_10] (rows=3 width=102) + Output:["dt","greg_dt"] + Filter Operator [FIL_9] (rows=3 width=102) + predicate:(id = 5) + TableScan [TS_0] (rows=7 width=102) + default@testvec,testvec,Tbl:COMPLETE,Col:NONE,Output:["id","dt","greg_dt"] + +PREHOOK: query: select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@testvec +#### A masked pattern was here #### +POSTHOOK: query: select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testvec +#### A masked pattern was here #### +20150313 2015-03-13 diff --git ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out new file mode 100644 index 0000000..1269ed8 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out @@ -0,0 +1,2315 @@ +PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +POSTHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +POSTHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain vectorization expression +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: explain vectorization expression +select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +6 +PREHOOK: query: explain vectorization expression +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +0 9 9 +2 1 1 +4 1 1 +5 9 9 +8 1 1 +9 1 1 +PREHOOK: query: explain vectorization expression +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: explain vectorization expression +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: explain vectorization expression +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +#### A masked pattern was here #### +20 +PREHOOK: query: explain vectorization expression +select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: explain vectorization expression +select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongColAddLongScalar(col 0, val 1) -> 2:long) -> boolean + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongColAddLongScalar(col 0, val 1) -> 2:long) -> boolean + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: explain vectorization expression +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: explain vectorization expression +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Statistics: Num rows: 6 Data size: 613 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +56 +PREHOOK: query: explain vectorization expression +select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: CREATE TABLE dest1(key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: explain vectorization expression +from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, val1, val2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, val1, val2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, val1, val2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, val1, val2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +2 val_2 val_2 +4 val_4 val_4 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +8 val_8 val_8 +9 val_9 val_9 +PREHOOK: query: DROP TABLE dest2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest2 +PREHOOK: Output: default@dest2 +POSTHOOK: query: DROP TABLE dest2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: default@dest2 +PREHOOK: query: CREATE TABLE dest2(key int, cnt int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: CREATE TABLE dest2(key int, cnt int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: explain vectorization expression +from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, count(*) group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, count(*) group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, count(*) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, count(*) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.cnt EXPRESSION [(tbl1)a.null, (tbl2)b.null, ] +POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 9 +2 1 +4 1 +5 9 +8 1 +9 1 diff --git ql/src/test/results/clientpositive/tez/vector_between_columns.q.out ql/src/test/results/clientpositive/tez/vector_between_columns.q.out new file mode 100644 index 0000000..0b0af7d --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_between_columns.q.out @@ -0,0 +1,367 @@ +PREHOOK: query: create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TSINT_txt +POSTHOOK: query: create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TSINT_txt +PREHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TINT_txt +POSTHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TINT_txt +PREHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tsint_txt +POSTHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tsint_txt +PREHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tint_txt +POSTHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tint_txt +PREHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tsint_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TSINT +POSTHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tsint_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TSINT +POSTHOOK: Lineage: tsint.csint SIMPLE [(tsint_txt)tsint_txt.FieldSchema(name:csint, type:smallint, comment:null), ] +POSTHOOK: Lineage: tsint.rnum SIMPLE [(tsint_txt)tsint_txt.FieldSchema(name:rnum, type:int, comment:null), ] +tsint_txt.rnum tsint_txt.csint +PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tint_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TINT +POSTHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tint_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TINT +POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] +tint_txt.rnum tint_txt.cint +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain vectorization expression +select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tint + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: rnum (type: int), cint (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Map Join Vectorization: + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3, 5] + selectExpressions: VectorUDFAdaptor(CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> 5:string + Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: tsint + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: rnum (type: int), csint (type: smallint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: smallint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint +PREHOOK: type: QUERY +PREHOOK: Input: default@tint +PREHOOK: Input: default@tsint +#### A masked pattern was here #### +POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tint +POSTHOOK: Input: default@tsint +#### A masked pattern was here #### +tint.rnum tsint.rnum tint.cint tsint.csint between_col +0 0 NULL NULL NoOk +0 1 NULL -1 NoOk +0 2 NULL 0 NoOk +0 3 NULL 1 NoOk +0 4 NULL 10 NoOk +1 0 -1 NULL NoOk +1 1 -1 -1 Ok +1 2 -1 0 NoOk +1 3 -1 1 NoOk +1 4 -1 10 NoOk +2 0 0 NULL NoOk +2 1 0 -1 NoOk +2 2 0 0 Ok +2 3 0 1 NoOk +2 4 0 10 NoOk +3 0 1 NULL NoOk +3 1 1 -1 NoOk +3 2 1 0 NoOk +3 3 1 1 Ok +3 4 1 10 NoOk +4 0 10 NULL NoOk +4 1 10 -1 NoOk +4 2 10 0 NoOk +4 3 10 1 NoOk +4 4 10 10 Ok +Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain vectorization expression +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tint + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: rnum (type: int), cint (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Map Join Vectorization: + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> boolean + predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) + Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3] + Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: tsint + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: rnum (type: int), csint (type: smallint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: smallint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: type: QUERY +PREHOOK: Input: default@tint +PREHOOK: Input: default@tsint +#### A masked pattern was here #### +POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tint +POSTHOOK: Input: default@tsint +#### A masked pattern was here #### +tint.rnum tsint.rnum tint.cint tsint.csint +1 1 -1 -1 +2 2 0 0 +3 3 1 1 +4 4 10 10 diff --git ql/src/test/results/clientpositive/tez/vector_between_in.q.out ql/src/test/results/clientpositive/tez/vector_between_in.q.out new file mode 100644 index 0000000..9011e6c --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_between_in.q.out @@ -0,0 +1,1703 @@ +PREHOOK: query: CREATE TABLE decimal_date_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, CAST(CAST((CAST(cint AS BIGINT) *ctinyint) AS TIMESTAMP) AS DATE) AS cdate FROM alltypesorc ORDER BY cdate +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_date_test +POSTHOOK: query: CREATE TABLE decimal_date_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, CAST(CAST((CAST(cint AS BIGINT) *ctinyint) AS TIMESTAMP) AS DATE) AS cdate FROM alltypesorc ORDER BY cdate +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_date_test +POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnInList(col 3, values [-67, -171]) -> boolean + predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdate (type: date) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 4:boolean) -> boolean + predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> boolean + predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdecimal1 (type: decimal(20,10)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(20,10)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(20,10)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean) -> boolean + predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 3, left -2, right 1) -> boolean + predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdate (type: date) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnNotBetween(col 3, left -610, right 608) -> boolean + predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) + Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdate (type: date) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnBetween(col 1, left -20, right 45.9918918919) -> boolean + predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdecimal1 (type: decimal(20,10)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(20,10)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(20,10)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnNotBetween(col 1, left -2000, right 4390.1351351351) -> boolean + predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) + Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +1969-07-14 +1969-07-14 +1969-07-14 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +1969-10-26 +PREHOOK: query: SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +6026 +PREHOOK: query: SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +-3367.6517567568 +-3367.6517567568 +-3367.6517567568 +-3367.6517567568 +-3367.6517567568 +-3367.6517567568 +-3367.6517567568 +881.0135135135 +2365.8945945946 +PREHOOK: query: SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +9165 +PREHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +1969-12-30 +1969-12-30 +1969-12-30 +1969-12-30 +1969-12-30 +1969-12-30 +1969-12-30 +1969-12-30 +1969-12-30 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +1970-01-02 +PREHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +1968-04-06 +1968-04-08 +1968-04-09 +1968-04-13 +1968-04-15 +1968-04-15 +1968-04-18 +1968-04-22 +1968-04-24 +1968-04-25 +1968-04-26 +1968-04-26 +1968-04-26 +1968-04-28 +1968-04-28 +1968-04-28 +1968-04-28 +1968-04-29 +1968-04-30 +1971-09-02 +1971-09-04 +1971-09-06 +1971-09-06 +1971-09-06 +1971-09-09 +1971-09-09 +1971-09-15 +1971-09-17 +1971-09-18 +1971-09-21 +1971-09-21 +1971-09-21 +1971-09-22 +1971-09-22 +1971-09-25 +PREHOOK: query: SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +-18.5162162162 +-17.3216216216 +-16.7243243243 +-16.1270270270 +-15.5297297297 +-10.7513513514 +-9.5567567568 +-8.3621621622 +-5.9729729730 +-3.5837837838 +4.1810810811 +4.7783783784 +4.7783783784 +5.3756756757 +5.9729729730 +5.9729729730 +11.3486486486 +11.3486486486 +11.9459459459 +14.9324324324 +19.1135135135 +20.3081081081 +22.1000000000 +24.4891891892 +33.4486486486 +34.6432432432 +40.0189189189 +42.4081081081 +43.0054054054 +44.2000000000 +44.2000000000 +44.7972972973 +45.9918918919 +PREHOOK: query: SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +6172 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 4:boolean + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] + keys: _col0 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] + keys: _col0 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 4:boolean + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] + keys: _col0 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 4:boolean + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] + keys: _col0 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 6230 +false 6041 +true 17 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 3114 +false 9165 +true 9 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 6230 +false 5974 +true 84 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 3114 +false 3002 +true 6172 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 6230 +false 6041 +true 17 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 3114 +false 9165 +true 9 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 6230 +false 5974 +true 84 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 3114 +false 3002 +true 6172 diff --git ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out new file mode 100644 index 0000000..dc93d1a --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out @@ -0,0 +1,587 @@ +PREHOOK: query: DROP TABLE over1k +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1k +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE hundredorc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE hundredorc +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k +POSTHOOK: query: CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: CREATE TABLE hundredorc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hundredorc +POSTHOOK: query: CREATE TABLE hundredorc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hundredorc +PREHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@hundredorc +POSTHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@hundredorc +POSTHOOK: Lineage: hundredorc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: hundredorc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: hundredorc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: hundredorc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: hundredorc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: hundredorc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: hundredorc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT sum(hash(*)) +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT sum(hash(*)) +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 10) -> boolean + predicate: bin is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: binary) + 1 _col10 (type: binary) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + input vertices: + 1 Map 3 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [21] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 21:int + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 21) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 10) -> boolean + predicate: bin is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col10 (type: binary) + sort order: + + Map-reduce partition columns: _col10 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin +PREHOOK: type: QUERY +PREHOOK: Input: default@hundredorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hundredorc +#### A masked pattern was here #### +-27832781952 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(*), bin +FROM hundredorc +GROUP BY bin +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(*), bin +FROM hundredorc +GROUP BY bin +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hundredorc + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Select Operator + expressions: bin (type: binary) + outputColumnNames: bin + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 10 + native: false + projectedOutputColumns: [0] + keys: bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: binary) + sort order: + + Map-reduce partition columns: _col0 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: binary) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(*), bin +FROM hundredorc +GROUP BY bin +PREHOOK: type: QUERY +PREHOOK: Input: default@hundredorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*), bin +FROM hundredorc +GROUP BY bin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hundredorc +#### A masked pattern was here #### +5 american history +5 biology +2 chemistry +2 debate +4 education +5 forestry +4 geology +5 history +6 industrial engineering +3 joggying +5 kindergarten +1 linguistics +9 mathematics +8 nap time +1 opthamology +2 philosophy +5 quiet hour +4 religion +3 study skills +7 topology +1 undecided +2 values clariffication +3 wind surfing +3 xylophone band +2 yard duty +3 zync studies +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10, 11] + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: binary) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/tez/vector_bround.q.out ql/src/test/results/clientpositive/tez/vector_bround.q.out new file mode 100644 index 0000000..9458d00 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_bround.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: create table test_vector_bround(v0 double, v1 double) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_vector_bround +POSTHOOK: query: create table test_vector_bround(v0 double, v1 double) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_vector_bround +PREHOOK: query: insert into table test_vector_bround +values +(2.5, 1.25), +(3.5, 1.35), +(-2.5, -1.25), +(-3.5, -1.35), +(2.49, 1.249), +(3.49, 1.349), +(2.51, 1.251), +(3.51, 1.351) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_vector_bround +POSTHOOK: query: insert into table test_vector_bround +values +(2.5, 1.25), +(3.5, 1.35), +(-2.5, -1.25), +(-3.5, -1.35), +(2.49, 1.249), +(3.49, 1.349), +(2.51, 1.251), +(3.51, 1.351) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_vector_bround +POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=8 width=16) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=8 width=16) + default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:NONE,Output:["v0","v1"] + +PREHOOK: query: select bround(v0), bround(v1, 1) from test_vector_bround +PREHOOK: type: QUERY +PREHOOK: Input: default@test_vector_bround +#### A masked pattern was here #### +POSTHOOK: query: select bround(v0), bround(v1, 1) from test_vector_bround +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_vector_bround +#### A masked pattern was here #### +2.0 1.2 +4.0 1.4 +-2.0 -1.2 +-4.0 -1.4 +2.0 1.2 +3.0 1.3 +3.0 1.3 +4.0 1.4 diff --git ql/src/test/results/clientpositive/tez/vector_bucket.q.out ql/src/test/results/clientpositive/tez/vector_bucket.q.out new file mode 100644 index 0000000..8632613 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_bucket.q.out @@ -0,0 +1,125 @@ +PREHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@non_orc_table +POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@non_orc_table +PREHOOK: query: explain vectorization expression +insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: values__tmp__table__1 + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: tmp_values_col1 (type: string), tmp_values_col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1] + selectExpressions: VectorUDFAdaptor(UDFToInteger(VALUE._col0)) -> 2:int + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.non_orc_table + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.non_orc_table + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: select a, b from non_orc_table order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +POSTHOOK: query: select a, b from non_orc_table order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +PREHOOK: query: insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +PREHOOK: type: QUERY +PREHOOK: Output: default@non_orc_table +POSTHOOK: query: insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@non_orc_table +POSTHOOK: Lineage: non_orc_table.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: non_orc_table.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select a, b from non_orc_table order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +POSTHOOK: query: select a, b from non_orc_table order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +1 one +1 one +2 two +3 three diff --git ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out new file mode 100644 index 0000000..ba43096 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out @@ -0,0 +1,266 @@ +PREHOOK: query: DROP TABLE over1k +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1k +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE over1korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k +POSTHOOK: query: CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1korc +POSTHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1korc +PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1korc +POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(50), avg(50.0), avg(50) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 2 + native: false + projectedOutputColumns: [0, 1, 2] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1korc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1korc +#### A masked pattern was here #### +65536 50.0 50.0 50.0000 +65537 50.0 50.0 50.0000 +65538 50.0 50.0 50.0000 +65539 50.0 50.0 50.0000 +65540 50.0 50.0 50.0000 +65541 50.0 50.0 50.0000 +65542 50.0 50.0 50.0000 +65543 50.0 50.0 50.0000 +65544 50.0 50.0 50.0000 +65545 50.0 50.0 50.0000 diff --git ql/src/test/results/clientpositive/tez/vector_char_2.q.out ql/src/test/results/clientpositive/tez/vector_char_2.q.out new file mode 100644 index 0000000..0fbaaca --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_2.q.out @@ -0,0 +1,430 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_2 +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:int + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] + keys: _col0 (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:int + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] + keys: _col0 (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/tez/vector_char_4.q.out ql/src/test/results/clientpositive/tez/vector_char_4.q.out new file mode 100644 index 0000000..07b02a0 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_4.q.out @@ -0,0 +1,198 @@ +PREHOOK: query: drop table if exists vectortab2k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2k +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists vectortab2korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2korc +PREHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: default@vectortab2korc +POSTHOOK: Lineage: vectortab2korc.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.d SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dc SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dt SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.f SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s2, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] +PREHOOK: query: drop table if exists char_lazy_binary_columnar +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_lazy_binary_columnar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_lazy_binary_columnar +PREHOOK: query: explain vectorization expression +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + + Stage: Stage-3 + Stats-Aggr Operator + diff --git ql/src/test/results/clientpositive/tez/vector_char_cast.q.out ql/src/test/results/clientpositive/tez/vector_char_cast.q.out new file mode 100644 index 0000000..c836dd8 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_cast.q.out @@ -0,0 +1,33 @@ +PREHOOK: query: create table s1(id smallint) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@s1 +POSTHOOK: query: create table s1(id smallint) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s1 +PREHOOK: query: insert into table s1 values (1000),(1001),(1002),(1003),(1000) +PREHOOK: type: QUERY +PREHOOK: Output: default@s1 +POSTHOOK: query: insert into table s1 values (1000),(1001),(1002),(1003),(1000) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@s1 +POSTHOOK: Lineage: s1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select count(1) from s1 where cast(id as char(4))='1000' +PREHOOK: type: QUERY +PREHOOK: Input: default@s1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from s1 where cast(id as char(4))='1000' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s1 +#### A masked pattern was here #### +2 +PREHOOK: query: select count(1) from s1 where cast(id as char(4))='1000' +PREHOOK: type: QUERY +PREHOOK: Input: default@s1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from s1 where cast(id as char(4))='1000' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s1 +#### A masked pattern was here #### +2 diff --git ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out new file mode 100644 index 0000000..f38bf30 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out @@ -0,0 +1,681 @@ +PREHOOK: query: drop table if exists char_join1_vc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_join1_vc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_join1_vc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_join1_vc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_join1_str +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_join1_str +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_join1_vc1_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_join1_vc1_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_join1_vc2_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_join1_vc2_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_join1_str_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_join1_str_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_join1_vc1 ( + c1 int, + c2 char(10) +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_join1_vc1 +POSTHOOK: query: create table char_join1_vc1 ( + c1 int, + c2 char(10) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_join1_vc1 +PREHOOK: query: create table char_join1_vc2 ( + c1 int, + c2 char(20) +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_join1_vc2 +POSTHOOK: query: create table char_join1_vc2 ( + c1 int, + c2 char(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_join1_vc2 +PREHOOK: query: create table char_join1_str ( + c1 int, + c2 string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_join1_str +POSTHOOK: query: create table char_join1_str ( + c1 int, + c2 string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_join1_str +PREHOOK: query: load data local inpath '../../data/files/vc1.txt' into table char_join1_vc1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@char_join1_vc1 +POSTHOOK: query: load data local inpath '../../data/files/vc1.txt' into table char_join1_vc1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@char_join1_vc1 +PREHOOK: query: load data local inpath '../../data/files/vc1.txt' into table char_join1_vc2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@char_join1_vc2 +POSTHOOK: query: load data local inpath '../../data/files/vc1.txt' into table char_join1_vc2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@char_join1_vc2 +PREHOOK: query: load data local inpath '../../data/files/vc1.txt' into table char_join1_str +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@char_join1_str +POSTHOOK: query: load data local inpath '../../data/files/vc1.txt' into table char_join1_str +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@char_join1_str +PREHOOK: query: create table char_join1_vc1_orc stored as orc as select * from char_join1_vc1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@char_join1_vc1 +PREHOOK: Output: database:default +PREHOOK: Output: default@char_join1_vc1_orc +POSTHOOK: query: create table char_join1_vc1_orc stored as orc as select * from char_join1_vc1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@char_join1_vc1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_join1_vc1_orc +POSTHOOK: Lineage: char_join1_vc1_orc.c1 SIMPLE [(char_join1_vc1)char_join1_vc1.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: char_join1_vc1_orc.c2 SIMPLE [(char_join1_vc1)char_join1_vc1.FieldSchema(name:c2, type:char(10), comment:null), ] +PREHOOK: query: create table char_join1_vc2_orc stored as orc as select * from char_join1_vc2 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@char_join1_vc2 +PREHOOK: Output: database:default +PREHOOK: Output: default@char_join1_vc2_orc +POSTHOOK: query: create table char_join1_vc2_orc stored as orc as select * from char_join1_vc2 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@char_join1_vc2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_join1_vc2_orc +POSTHOOK: Lineage: char_join1_vc2_orc.c1 SIMPLE [(char_join1_vc2)char_join1_vc2.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: char_join1_vc2_orc.c2 SIMPLE [(char_join1_vc2)char_join1_vc2.FieldSchema(name:c2, type:char(20), comment:null), ] +PREHOOK: query: create table char_join1_str_orc stored as orc as select * from char_join1_str +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@char_join1_str +PREHOOK: Output: database:default +PREHOOK: Output: default@char_join1_str_orc +POSTHOOK: query: create table char_join1_str_orc stored as orc as select * from char_join1_str +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@char_join1_str +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_join1_str_orc +POSTHOOK: Lineage: char_join1_str_orc.c1 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: char_join1_str_orc.c2 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c2, type:string, comment:null), ] +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: c2 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: char(10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: char(10)) + 1 _col1 (type: char(10)) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(10)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: c2 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: char(10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: char(10)) + sort order: + + Map-reduce partition columns: _col1 (type: char(10)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(10)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_join1_vc1_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_join1_vc1_orc +#### A masked pattern was here #### +1 abc 1 abc +1 abc 2 abc +2 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: c2 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: char(10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: char(20)) + sort order: + + Map-reduce partition columns: _col1 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: c2 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: char(20)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: char(20)) + 1 _col1 (type: char(20)) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(20)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(20)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_join1_vc1_orc +PREHOOK: Input: default@char_join1_vc2_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_join1_vc1_orc +POSTHOOK: Input: default@char_join1_vc2_orc +#### A masked pattern was here #### +1 abc 1 abc +1 abc 2 abc +2 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: c2 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: char(10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToString(_col1) (type: string) + 1 _col1 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: CastStringGroupToString(col 1) -> 2:String + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: c2 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_join1_str_orc +PREHOOK: Input: default@char_join1_vc1_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_join1_str_orc +POSTHOOK: Input: default@char_join1_vc1_orc +#### A masked pattern was here #### +1 abc 1 abc +2 abc 1 abc +3 abc 3 abc +PREHOOK: query: drop table char_join1_vc1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_join1_vc1 +PREHOOK: Output: default@char_join1_vc1 +POSTHOOK: query: drop table char_join1_vc1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_join1_vc1 +POSTHOOK: Output: default@char_join1_vc1 +PREHOOK: query: drop table char_join1_vc2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_join1_vc2 +PREHOOK: Output: default@char_join1_vc2 +POSTHOOK: query: drop table char_join1_vc2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_join1_vc2 +POSTHOOK: Output: default@char_join1_vc2 +PREHOOK: query: drop table char_join1_str +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_join1_str +PREHOOK: Output: default@char_join1_str +POSTHOOK: query: drop table char_join1_str +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_join1_str +POSTHOOK: Output: default@char_join1_str +PREHOOK: query: drop table char_join1_vc1_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_join1_vc1_orc +PREHOOK: Output: default@char_join1_vc1_orc +POSTHOOK: query: drop table char_join1_vc1_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_join1_vc1_orc +POSTHOOK: Output: default@char_join1_vc1_orc +PREHOOK: query: drop table char_join1_vc2_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_join1_vc2_orc +PREHOOK: Output: default@char_join1_vc2_orc +POSTHOOK: query: drop table char_join1_vc2_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_join1_vc2_orc +POSTHOOK: Output: default@char_join1_vc2_orc +PREHOOK: query: drop table char_join1_str_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_join1_str_orc +PREHOOK: Output: default@char_join1_str_orc +POSTHOOK: query: drop table char_join1_str_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_join1_str_orc +POSTHOOK: Output: default@char_join1_str_orc diff --git ql/src/test/results/clientpositive/tez/vector_char_simple.q.out ql/src/test/results/clientpositive/tez/vector_char_simple.q.out new file mode 100644 index 0000000..e2b7814 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_simple.q.out @@ -0,0 +1,311 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_2 +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value +from src +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: explain vectorization only select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select key, value +from src +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: explain vectorization only select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 +PREHOOK: query: create table char_3 ( + field char(12) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_3 +POSTHOOK: query: create table char_3 ( + field char(12) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_3 +PREHOOK: query: explain vectorization only operator +insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: CastLongToChar(col 0, maxLength 12) -> 1:Char + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-2 + + Stage: Stage-0 + + Stage: Stage-3 + +PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@char_3 +POSTHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@char_3 +POSTHOOK: Lineage: char_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table char_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_3 +PREHOOK: Output: default@char_3 +POSTHOOK: query: drop table char_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_3 +POSTHOOK: Output: default@char_3 diff --git ql/src/test/results/clientpositive/tez/vector_coalesce.q.out ql/src/test/results/clientpositive/tez/vector_coalesce.q.out new file mode 100644 index 0000000..f0d5ef4 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_coalesce.q.out @@ -0,0 +1,601 @@ +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 2, 4, 1, 16] + selectExpressions: VectorCoalesce(columns [12, 6, 13, 14, 15])(children: ConstantVectorExpression(val null) -> 12:string, col 6, CastLongToString(col 2) -> 13:String, VectorUDFAdaptor(null(cfloat)) -> 14:string, CastLongToString(col 1) -> 15:String) -> 16:string + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 0, 1, 2, 3, 4] + selectExpressions: ConstantVectorExpression(val null) -> 5:double + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL NULL -413196097 -51.0 NULL -413196097 +NULL NULL -413553449 11.0 NULL -413553449 +NULL NULL -457224565 11.0 NULL -457224565 +NULL NULL -591488718 -51.0 NULL -591488718 +NULL NULL -656987896 8.0 NULL -656987896 +NULL NULL -670908417 8.0 NULL -670908417 +NULL NULL -738306196 -51.0 NULL -738306196 +NULL NULL -819152895 8.0 NULL -819152895 +NULL NULL -827212561 8.0 NULL -827212561 +NULL NULL -949587513 11.0 NULL -949587513 +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +FROM alltypesorc +WHERE (ctinyint IS NULL) +ORDER BY ctinyint, cdouble, cint, c +LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +FROM alltypesorc +WHERE (ctinyint IS NULL) +ORDER BY ctinyint, cdouble, cint, c +LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 15] + selectExpressions: VectorCoalesce(columns [12, 14, 13])(children: ConstantVectorExpression(val null) -> 12:double, DoubleColAddDoubleColumn(col 5, col 13)(children: FuncLog2LongToDouble(col 2) -> 13:double) -> 14:double, ConstantVectorExpression(val 0.0) -> 13:double) -> 15:double + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 3:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +FROM alltypesorc +WHERE (ctinyint IS NULL) +ORDER BY ctinyint, cdouble, cint, c +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +FROM alltypesorc +WHERE (ctinyint IS NULL) +ORDER BY ctinyint, cdouble, cint, c +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL NULL -1015272448 0.0 +NULL NULL -609074876 0.0 +NULL NULL -700300206 0.0 +NULL NULL -726473298 0.0 +NULL NULL -738747840 0.0 +NULL NULL -838810013 0.0 +NULL NULL -850295959 0.0 +NULL NULL -886426182 0.0 +NULL NULL -899422227 0.0 +NULL NULL -971543377 0.0 +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) +ORDER BY cfloat, cbigint, c +LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) +ORDER BY cfloat, cbigint, c +LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 0:float, ConstantVectorExpression(val null) -> 1:bigint, ConstantVectorExpression(val 0.0) -> 2:double + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) +ORDER BY cfloat, cbigint, c +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) +ORDER BY cfloat, cbigint, c +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL +ORDER BY ctimestamp1, ctimestamp2, c +LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL +ORDER BY ctimestamp1, ctimestamp2, c +LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 9) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8, col 9) -> 12:timestamp + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL +ORDER BY ctimestamp1, ctimestamp2, c +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL +ORDER BY ctimestamp1, ctimestamp2, c +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL 1969-12-31 15:59:30.929 1969-12-31 15:59:30.929 +NULL 1969-12-31 15:59:30.929 1969-12-31 15:59:30.929 +NULL 1969-12-31 15:59:30.929 1969-12-31 15:59:30.929 +NULL 1969-12-31 15:59:43.63 1969-12-31 15:59:43.63 +NULL 1969-12-31 15:59:43.658 1969-12-31 15:59:43.658 +NULL 1969-12-31 15:59:43.672 1969-12-31 15:59:43.672 +NULL 1969-12-31 15:59:43.684 1969-12-31 15:59:43.684 +NULL 1969-12-31 15:59:43.703 1969-12-31 15:59:43.703 +NULL 1969-12-31 15:59:43.704 1969-12-31 15:59:43.704 +NULL 1969-12-31 15:59:43.709 1969-12-31 15:59:43.709 +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) +ORDER BY cfloat, cbigint, c +LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) +ORDER BY cfloat, cbigint, c +LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 0:float, ConstantVectorExpression(val null) -> 1:bigint, ConstantVectorExpression(val null) -> 2:float + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) +ORDER BY cfloat, cbigint, c +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) +ORDER BY cfloat, cbigint, c +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +FROM alltypesorc +WHERE cbigint IS NULL +LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +FROM alltypesorc +WHERE cbigint IS NULL +LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 3) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 0, 14] + selectExpressions: ConstantVectorExpression(val null) -> 12:bigint, VectorCoalesce(columns [13, 0])(children: ConstantVectorExpression(val null) -> 13:tinyint, col 0) -> 14:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +FROM alltypesorc +WHERE cbigint IS NULL +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +FROM alltypesorc +WHERE cbigint IS NULL +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL -11 -11 +NULL -28 -28 +NULL -34 -34 +NULL -50 -50 +NULL 27 27 +NULL 29 29 +NULL 31 31 +NULL 31 31 +NULL 61 61 +NULL NULL NULL diff --git ql/src/test/results/clientpositive/tez/vector_coalesce_2.q.out ql/src/test/results/clientpositive/tez/vector_coalesce_2.q.out new file mode 100644 index 0000000..d38f8ae --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_coalesce_2.q.out @@ -0,0 +1,393 @@ +PREHOOK: query: create table str_str_orc (str1 string, str2 string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@str_str_orc +POSTHOOK: query: create table str_str_orc (str1 string, str2 string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@str_str_orc +PREHOOK: query: insert into table str_str_orc values (null, "X"), ("0", "X"), ("1", "X"), (null, "y") +PREHOOK: type: QUERY +PREHOOK: Output: default@str_str_orc +POSTHOOK: query: insert into table str_str_orc values (null, "X"), ("0", "X"), ("1", "X"), (null, "y") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@str_str_orc +POSTHOOK: Lineage: str_str_orc.str1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: str_str_orc.str2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT + str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result +from str_str_orc +GROUP BY str2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT + str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result +from str_str_orc +GROUP BY str2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: str_str_orc + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result +from str_str_orc +GROUP BY str2 +PREHOOK: type: QUERY +PREHOOK: Input: default@str_str_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result +from str_str_orc +GROUP BY str2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@str_str_orc +#### A masked pattern was here #### +X 0.02 +y 0.0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT COALESCE(str1, 0) as result +from str_str_orc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT COALESCE(str1, 0) as result +from str_str_orc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: str_str_orc + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: COALESCE(str1,0) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COALESCE(str1, 0) as result +from str_str_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@str_str_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT COALESCE(str1, 0) as result +from str_str_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@str_str_orc +#### A masked pattern was here #### +0 +0 +1 +0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT + str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result +from str_str_orc +GROUP BY str2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT + str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result +from str_str_orc +GROUP BY str2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: str_str_orc + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4] + selectExpressions: VectorUDFAdaptor(UDFToInteger(COALESCE(str1,0)))(children: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string) -> 4:int + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0] + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2)(children: DoubleColDivideDoubleScalar(col 2, val 60.0)(children: CastLongToDouble(col 1) -> 2:double) -> 3:double) -> 2:double + Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result +from str_str_orc +GROUP BY str2 +PREHOOK: type: QUERY +PREHOOK: Input: default@str_str_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result +from str_str_orc +GROUP BY str2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@str_str_orc +#### A masked pattern was here #### +X 0.02 +y 0.0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT COALESCE(str1, 0) as result +from str_str_orc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT COALESCE(str1, 0) as result +from str_str_orc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: str_str_orc + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: COALESCE(str1,0) (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COALESCE(str1, 0) as result +from str_str_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@str_str_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT COALESCE(str1, 0) as result +from str_str_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@str_str_orc +#### A masked pattern was here #### +0 +0 +1 +0 diff --git ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out index 8006a90..2b9882e 100644 --- ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out @@ -95,9 +95,9 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@char_tbl2 gpa=3 gpa=3.5 -PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY -POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out index 4535e66..7939a8a 100644 --- ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out +++ ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out @@ -27,10 +27,14 @@ POSTHOOK: query: SHOW PARTITIONS non_string_part POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@non_string_part ctinyint=__HIVE_DEFAULT_PARTITION__ -PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -48,32 +52,73 @@ STAGE PLANS: TableScan alias: non_string_part Statistics: Num rows: 3073 Data size: 351442 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: cint (type: int), ctinyint (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4] Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -106,10 +151,14 @@ POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ 799471 NULL 1248059 NULL 1286921 NULL -PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,31 +176,72 @@ STAGE PLANS: TableScan alias: non_string_part Statistics: Num rows: 3073 Data size: 363734 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/tez/vectorization_div0.q.out ql/src/test/results/clientpositive/tez/vectorization_div0.q.out index 3c017e6..4454f95 100644 --- ql/src/test/results/clientpositive/tez/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_div0.q.out @@ -154,32 +154,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 17] + selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -327,32 +369,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 16, 14, 17] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 1, 3, 4] Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/tez/vectorization_limit.q.out ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index dfb0102..071c6d9 100644 --- ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -1,26 +1,58 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 7 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Select Operator - expressions: cbigint (type: bigint), cdouble (type: double) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 7 - ListSink + ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 @@ -61,32 +93,73 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1] Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -150,12 +223,28 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 1.0) -> 12:double Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(col 12) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 12) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 @@ -168,7 +257,20 @@ STAGE PLANS: TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) @@ -244,11 +346,24 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 @@ -257,22 +372,54 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -313,12 +460,16 @@ NULL -48 -47 -46 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -336,11 +487,24 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5] Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 5 + native: false + projectedOutputColumns: [] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -349,27 +513,66 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -458,15 +661,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 0] Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -475,14 +696,40 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 @@ -490,20 +737,42 @@ STAGE PLANS: Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/vector_aggregate_9.q.out ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 6dc5479..3ad29ef 100644 --- ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,12 +122,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: dc (type: decimal(38,18)) outputColumnNames: dc + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(dc), max(dc), sum(dc), avg(dc) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 6) -> decimal(38,18), VectorUDAFMaxDecimal(col 6) -> decimal(38,18), VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimal(col 6) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 6) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE @@ -132,9 +150,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out index 2f9fff7..01b0fb7 100644 --- ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out @@ -31,10 +31,14 @@ POSTHOOK: Output: default@testvec POSTHOOK: Lineage: testvec.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: testvec.greg_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: testvec.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 PREHOOK: type: QUERY -POSTHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -63,6 +67,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), max(VALUE._col1) diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out index d38dfec..cf50a0f 100644 --- ql/src/test/results/clientpositive/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -61,13 +61,17 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -100,9 +104,16 @@ STAGE PLANS: TableScan alias: tsint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), csint (type: smallint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -110,20 +121,41 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3, 5] + selectExpressions: VectorUDFAdaptor(CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> 5:string Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Local Work: Map Reduce Local Work @@ -171,13 +203,17 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 4 3 10 1 NoOk 4 4 10 10 Ok Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -210,9 +246,16 @@ STAGE PLANS: TableScan alias: tsint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), csint (type: smallint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -220,23 +263,47 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> boolean predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3] Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index dead5a6..bb16b26 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -95,14 +95,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type: POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -138,12 +142,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 10) -> boolean predicate: bin is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -151,27 +166,64 @@ STAGE PLANS: keys: 0 _col10 (type: binary) 1 _col10 (type: binary) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [22] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 22:int Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 22) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -200,16 +252,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### -27832781952 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -221,12 +277,26 @@ STAGE PLANS: TableScan alias: hundredorc Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: bin (type: binary) outputColumnNames: bin + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 10 + native: false + projectedOutputColumns: [0] keys: bin (type: binary) mode: hash outputColumnNames: _col0, _col1 @@ -235,12 +305,33 @@ STAGE PLANS: key expressions: _col0 (type: binary) sort order: + Map-reduce partition columns: _col0 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: binary) mode: mergepartial outputColumnNames: _col0, _col1 @@ -301,14 +392,18 @@ POSTHOOK: Input: default@hundredorc 3 xylophone band 2 yard duty 3 zync studies -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -344,12 +439,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -357,20 +463,40 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_bround.q.out ql/src/test/results/clientpositive/vector_bround.q.out index 35c8b30..3191f11 100644 --- ql/src/test/results/clientpositive/vector_bround.q.out +++ ql/src/test/results/clientpositive/vector_bround.q.out @@ -32,10 +32,14 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test_vector_bround POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +PREHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY -POSTHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +POSTHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -59,6 +63,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_bucket.q.out ql/src/test/results/clientpositive/vector_bucket.q.out index 0f41aa6..2825489 100644 --- ql/src/test/results/clientpositive/vector_bucket.q.out +++ ql/src/test/results/clientpositive/vector_bucket.q.out @@ -6,12 +6,16 @@ POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INT POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@non_orc_table -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,6 +37,14 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 7a31b14..bf21732 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -95,20 +95,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -121,12 +125,27 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: i (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(50), avg(50.0), avg(50) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 2 + native: false + projectedOutputColumns: [0, 1, 2] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -139,9 +158,25 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -163,6 +198,14 @@ STAGE PLANS: Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index 4aad1e2..7aa03da 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -47,18 +47,22 @@ val_10 10 1 val_100 200 2 val_103 206 2 val_104 208 2 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -71,12 +75,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:int Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -85,13 +104,34 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -113,6 +153,14 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) @@ -177,18 +225,22 @@ val_97 194 2 val_96 96 1 val_95 190 2 val_92 92 1 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -201,12 +253,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:int Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -215,13 +282,34 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: - Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -243,6 +331,14 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_char_4.q.out ql/src/test/results/clientpositive/vector_char_4.q.out index 58988bf..1c58fd2 100644 --- ql/src/test/results/clientpositive/vector_char_4.q.out +++ ql/src/test/results/clientpositive/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -144,12 +148,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out index c3b7d23..34b24de 100644 --- ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out @@ -124,10 +124,14 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@char_join1_str_orc POSTHOOK: Lineage: char_join1_str_orc.c1 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: char_join1_str_orc.c2 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c2, type:string, comment:null), ] -PREHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -163,12 +167,23 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -176,16 +191,38 @@ STAGE PLANS: keys: 0 _col1 (type: char(10)) 1 _col1 (type: char(10)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(10)) @@ -218,10 +255,14 @@ POSTHOOK: Input: default@char_join1_vc1_orc 2 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -257,12 +298,23 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(20)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -270,16 +322,38 @@ STAGE PLANS: keys: 0 _col1 (type: char(20)) 1 _col1 (type: char(20)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(20)) @@ -314,10 +388,14 @@ POSTHOOK: Input: default@char_join1_vc2_orc 2 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -353,12 +431,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -366,16 +455,38 @@ STAGE PLANS: keys: 0 UDFToString(_col1) (type: string) 1 _col1 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/vector_char_simple.q.out ql/src/test/results/clientpositive/vector_char_simple.q.out index ddbc4f0..777645f 100644 --- ql/src/test/results/clientpositive/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,42 +66,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: + - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: select key, value from char_2 @@ -137,16 +121,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -154,42 +142,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: - - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: select key, value from char_2 @@ -230,12 +198,16 @@ POSTHOOK: query: create table char_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -245,55 +217,37 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int) + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: CAST( _col0 AS CHAR(12) (type: char(12)) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 Stage: Stage-0 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 Stage: Stage-2 - Stats-Aggr Operator PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_coalesce.q.out ql/src/test/results/clientpositive/vector_coalesce.q.out index 6344ddc..7addaad 100644 --- ql/src/test/results/clientpositive/vector_coalesce.q.out +++ ql/src/test/results/clientpositive/vector_coalesce.q.out @@ -1,15 +1,19 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -18,43 +22,40 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 1045942 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cdouble is null (type: boolean) - Statistics: Num rows: 3114 Data size: 265164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string), cint (type: int), cfloat (type: float), csmallint (type: smallint), COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) - sort order: +++++ - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 2, 4, 1, 16] + selectExpressions: VectorCoalesce(columns [12, 6, 13, 14, 15])(children: ConstantVectorExpression(val null) -> 12:string, col 6, CastLongToString(col 2) -> 13:String, VectorUDFAdaptor(null(cfloat)) -> 14:string, CastLongToString(col 1) -> 15:String) -> 16:string + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: null (type: double), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: float), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 246572 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc @@ -82,18 +83,22 @@ NULL NULL -738306196 -51.0 NULL -738306196 NULL NULL -819152895 8.0 NULL -819152895 NULL NULL -827212561 8.0 NULL -827212561 NULL NULL -949587513 11.0 NULL -949587513 -PREHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -102,43 +107,40 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ctinyint is null (type: boolean) - Statistics: Num rows: 3115 Data size: 37224 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double), cint (type: int), COALESCE(null,(cdouble + log2(cint)),0) (type: double) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: double), _col2 (type: int), _col3 (type: double) - sort order: +++ - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 15] + selectExpressions: VectorCoalesce(columns [12, 14, 13])(children: ConstantVectorExpression(val null) -> 12:double, DoubleColAddDoubleColumn(col 5, col 13)(children: FuncLog2LongToDouble(col 2) -> 13:double) -> 14:double, ConstantVectorExpression(val 0.0) -> 13:double) -> 15:double + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: null (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 27928 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc @@ -166,18 +168,22 @@ NULL NULL -850295959 0.0 NULL NULL -886426182 0.0 NULL NULL -899422227 0.0 NULL NULL -971543377 0.0 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -186,40 +192,39 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), 0.0 (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc @@ -247,18 +252,22 @@ NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 -PREHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -267,43 +276,40 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ctimestamp1 is not null or ctimestamp2 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), COALESCE(ctimestamp1,ctimestamp2) (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: timestamp) - sort order: +++ - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 9) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8, col 9) -> 12:timestamp + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: timestamp), KEY.reducesinkkey2 (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc @@ -331,18 +337,22 @@ NULL 1969-12-31 15:59:43.684 1969-12-31 15:59:43.684 NULL 1969-12-31 15:59:43.703 1969-12-31 15:59:43.703 NULL 1969-12-31 15:59:43.704 1969-12-31 15:59:43.704 NULL 1969-12-31 15:59:43.709 1969-12-31 15:59:43.709 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -351,40 +361,39 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), null (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc @@ -412,16 +421,20 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -430,33 +443,36 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cbigint is null (type: boolean) - Statistics: Num rows: 3115 Data size: 27912 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: null (type: bigint), ctinyint (type: tinyint), COALESCE(null,ctinyint) (type: tinyint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3115 Data size: 21772 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 3) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 0, 14] + selectExpressions: ConstantVectorExpression(val null) -> 12:bigint, VectorCoalesce(columns [13, 0])(children: ConstantVectorExpression(val null) -> 13:tinyint, col 0) -> 14:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index 98b2259..6cdf7db 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -14,18 +14,22 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@str_str_orc POSTHOOK: Lineage: str_str_orc.str1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: str_str_orc.str2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -43,6 +47,10 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -56,6 +64,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -94,14 +106,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -145,18 +161,22 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -168,12 +188,27 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4] + selectExpressions: VectorUDFAdaptor(UDFToInteger(COALESCE(str1,0)))(children: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string) -> 4:int Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -182,12 +217,33 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -226,14 +282,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -245,18 +305,37 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_complex_join.q.out ql/src/test/results/clientpositive/vector_complex_join.q.out index 92cb5d5..513c159 100644 --- ql/src/test/results/clientpositive/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/vector_complex_join.q.out @@ -17,13 +17,17 @@ POSTHOOK: Output: default@test POSTHOOK: Lineage: test.a SIMPLE [] POSTHOOK: Lineage: test.b EXPRESSION [] _c0 _c1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -81,6 +85,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type map of Column[_col1] not supported + vectorized: false Local Work: Map Reduce Local Work @@ -136,13 +146,17 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test2b POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -196,6 +210,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Data type array of Column[a] not supported + vectorized: false Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_count.q.out ql/src/test/results/clientpositive/vector_count.q.out index e829ad8..2bdaa68 100644 --- ql/src/test/results/clientpositive/vector_count.q.out +++ ql/src/test/results/clientpositive/vector_count.q.out @@ -43,10 +43,14 @@ POSTHOOK: Input: default@abcd 12 100 75 7 12 NULL 80 2 NULL 35 23 6 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -58,12 +62,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + projectedOutputColumns: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -72,12 +90,33 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -108,10 +147,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -123,12 +166,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:long) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -136,12 +193,33 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE @@ -168,10 +246,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -183,20 +265,48 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: a (type: int), b (type: int), c (type: int) sort order: +++ Map-reduce partition columns: a (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 @@ -227,10 +337,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -242,18 +356,46 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_data_types.q.out ql/src/test/results/clientpositive/vector_data_types.q.out index 63e545f..b1be4d7 100644 --- ql/src/test/results/clientpositive/vector_data_types.q.out +++ ql/src/test/results/clientpositive/vector_data_types.q.out @@ -95,10 +95,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -181,10 +185,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -17045922556 -PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -196,17 +204,41 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) diff --git ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index 626ff87..29ce321 100644 --- ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -20,20 +20,24 @@ POSTHOOK: Lineage: decimal_vgby.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_vgby.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -45,12 +49,26 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -59,12 +77,33 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -116,20 +155,24 @@ POSTHOOK: Input: default@decimal_vgby 6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 -PREHOOK: query: EXPLAIN SELECT cint, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -141,12 +184,27 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -158,9 +216,25 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 diff --git ql/src/test/results/clientpositive/vector_decimal_cast.q.out ql/src/test/results/clientpositive/vector_decimal_cast.q.out index aee5e02..8fb516a 100644 --- ql/src/test/results/clientpositive/vector_decimal_cast.q.out +++ ql/src/test/results/clientpositive/vector_decimal_cast.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13,24 +17,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 5) -> boolean, SelectColumnIsNotNull(col 2) -> boolean, SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 8) -> boolean) -> boolean predicate: (cdouble is not null and cint is not null and cboolean1 is not null and ctimestamp1 is not null) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 10, 8, 12, 13, 14, 15] + selectExpressions: CastDoubleToDecimal(col 5) -> 12:decimal(20,10), CastLongToDecimal(col 2) -> 13:decimal(23,14), CastLongToDecimal(col 10) -> 14:decimal(5,2), CastTimestampToDecimal(col 8) -> 15:decimal(15,0) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/vector_decimal_expressions.q.out index 7e0c240..b33007e 100644 --- ql/src/test/results/clientpositive/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/vector_decimal_expressions.q.out @@ -11,14 +11,18 @@ POSTHOOK: Output: default@decimal_test POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -30,19 +34,48 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 12288 Data size: 2128368 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 1, val 0) -> boolean, FilterDecimalColLessDecimalScalar(col 1, val 12345.5678) -> boolean, FilterDecimalColNotEqualDecimalScalar(col 2, val 0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 2, val 1000) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((cdecimal1 > 0) and (cdecimal1 < 12345.5678) and (cdecimal2 <> 0) and (cdecimal2 > 1000) and cdouble is not null) (type: boolean) Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdecimal1 + cdecimal2) (type: decimal(25,14)), (cdecimal1 - (2 * cdecimal2)) (type: decimal(26,14)), ((cdecimal1 + 2.34) / cdecimal2) (type: decimal(38,13)), (cdecimal1 * (cdecimal2 / 3.4)) (type: decimal(38,17)), (cdecimal1 % 10) (type: decimal(12,10)), UDFToInteger(cdecimal1) (type: int), UDFToShort(cdecimal2) (type: smallint), UDFToByte(cdecimal2) (type: tinyint), UDFToLong(cdecimal1) (type: bigint), UDFToBoolean(cdecimal1) (type: boolean), UDFToDouble(cdecimal2) (type: double), UDFToFloat(cdecimal1) (type: float), UDFToString(cdecimal2) (type: string), CAST( cdecimal1 AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + selectExpressions: DecimalColAddDecimalColumn(col 1, col 2) -> 3:decimal(25,14), DecimalColSubtractDecimalColumn(col 1, col 4)(children: DecimalScalarMultiplyDecimalColumn(val 2, col 2) -> 4:decimal(25,14)) -> 5:decimal(26,14), DecimalColDivideDecimalColumn(col 6, col 2)(children: DecimalColAddDecimalScalar(col 1, val 2.34) -> 6:decimal(21,10)) -> 7:decimal(38,13), DecimalColMultiplyDecimalColumn(col 1, col 8)(children: DecimalColDivideDecimalScalar(col 2, val 3.4) -> 8:decimal(27,17)) -> 9:decimal(38,17), DecimalColModuloDecimalScalar(col 1, val 10) -> 10:decimal(12,10), CastDecimalToLong(col 1) -> 11:int, CastDecimalToLong(col 2) -> 12:smallint, CastDecimalToLong(col 2) -> 13:tinyint, CastDecimalToLong(col 1) -> 14:bigint, CastDecimalToBoolean(col 1) -> 15:Boolean, CastDecimalToDouble(col 2) -> 16:double, CastDecimalToDouble(col 1) -> 17:double, CastDecimalToString(col 2) -> 18:String, CastDecimalToTimestamp(col 1) -> 19:timestamp Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(25,14)), _col1 (type: decimal(26,14)), _col2 (type: decimal(38,13)), _col3 (type: decimal(38,17)), _col4 (type: decimal(12,10)), _col5 (type: int), _col6 (type: smallint), _col7 (type: tinyint), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: double), _col11 (type: float), _col12 (type: string), _col13 (type: timestamp) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(25,14)), KEY.reducesinkkey1 (type: decimal(26,14)), KEY.reducesinkkey2 (type: decimal(38,13)), KEY.reducesinkkey3 (type: decimal(38,17)), KEY.reducesinkkey4 (type: decimal(12,10)), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: smallint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: string), KEY.reducesinkkey13 (type: timestamp) diff --git ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out index 6f01b52..c3c0c4e 100644 --- ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out @@ -72,12 +72,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -113,12 +117,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -126,16 +141,33 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL outputColumnNames: _col0, _col1 Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out index 94cfa50..bc063f4 100644 --- ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out +++ ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out @@ -12,7 +12,7 @@ POSTHOOK: Lineage: decimal_test.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSc POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) @@ -49,7 +49,7 @@ where cbigint % 500 = 0 and sin(cdecimal1) >= -1.0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) @@ -86,6 +86,10 @@ where cbigint % 500 = 0 and sin(cdecimal1) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -97,21 +101,44 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 12288 Data size: 2201752 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 4, val 0)(children: LongColModuloLongScalar(col 0, val 500) -> 4:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 6, val -1.0)(children: FuncSinDoubleToDouble(col 5)(children: CastDecimalToDouble(col 2) -> 5:double) -> 6:double) -> boolean) -> boolean predicate: (((cbigint % 500) = 0) and (sin(cdecimal1) >= -1.0)) (type: boolean) Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)), round(cdecimal1, 2) (type: decimal(13,2)), round(cdecimal1) (type: decimal(11,0)), floor(cdecimal1) (type: decimal(11,0)), ceil(cdecimal1) (type: decimal(11,0)), round(exp(cdecimal1), 58) (type: double), ln(cdecimal1) (type: double), log10(cdecimal1) (type: double), log2(cdecimal1) (type: double), log2((cdecimal1 - 15601)) (type: double), log(2, cdecimal1) (type: double), power(log2(cdecimal1), 2) (type: double), power(log2(cdecimal1), 2) (type: double), sqrt(cdecimal1) (type: double), abs(cdecimal1) (type: decimal(20,10)), sin(cdecimal1) (type: double), asin(cdecimal1) (type: double), cos(cdecimal1) (type: double), acos(cdecimal1) (type: double), atan(cdecimal1) (type: double), degrees(cdecimal1) (type: double), radians(cdecimal1) (type: double), cdecimal1 (type: decimal(20,10)), (- cdecimal1) (type: decimal(20,10)), sign(cdecimal1) (type: int), cos(((- sin(log(cdecimal1))) + 3.14159)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 7, 8, 9, 10, 5, 11, 12, 13, 15, 6, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 2, 28, 4, 29] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 2, decimalPlaces 2) -> 7:decimal(13,2), FuncRoundDecimalToDecimal(col 2) -> 8:decimal(11,0), FuncFloorDecimalToDecimal(col 2) -> 9:decimal(11,0), FuncCeilDecimalToDecimal(col 2) -> 10:decimal(11,0), RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 58)(children: FuncExpDoubleToDouble(col 5)(children: CastDecimalToDouble(col 2) -> 5:double) -> 6:double) -> 5:double, FuncLnDoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 11:double, FuncLog10DoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 12:double, FuncLog2DoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 13:double, FuncLog2DoubleToDouble(col 6)(children: CastDecimalToDouble(col 14)(children: DecimalColSubtractDecimalScalar(col 2, val 15601) -> 14:decimal(21,10)) -> 6:double) -> 15:double, VectorUDFAdaptor(log(2, cdecimal1)) -> 6:double, VectorUDFAdaptor(power(log2(cdecimal1), 2))(children: FuncLog2DoubleToDouble(col 16)(children: CastDecimalToDouble(col 2) -> 16:double) -> 17:double) -> 16:double, VectorUDFAdaptor(power(log2(cdecimal1), 2))(children: FuncLog2DoubleToDouble(col 17)(children: CastDecimalToDouble(col 2) -> 17:double) -> 18:double) -> 17:double, FuncSqrtDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 19:double, FuncAbsDecimalToDecimal(col 2) -> 20:decimal(20,10), FuncSinDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 21:double, FuncASinDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 22:double, FuncCosDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 23:double, FuncACosDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 24:double, FuncATanDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 25:double, FuncDegreesDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 26:double, FuncRadiansDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 27:double, FuncNegateDecimalToDecimal(col 2) -> 28:decimal(20,10), FuncSignDecimalToLong(col 2) -> 4:int, FuncCosDoubleToDouble(col 18)(children: DoubleColAddDoubleScalar(col 29, val 3.14159)(children: DoubleColUnaryMinus(col 18)(children: FuncSinDoubleToDouble(col 29)(children: FuncLnDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 29:double) -> 18:double) -> 29:double) -> 18:double) -> 29:double Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_decimal_precision.q.out ql/src/test/results/clientpositive/vector_decimal_precision.q.out index 94b9657..d175ec4 100644 --- ql/src/test/results/clientpositive/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/vector_decimal_precision.q.out @@ -545,10 +545,14 @@ NULL NULL 123456789.0123456789 15241578753238836.75019051998750191 1234567890.1234560000 1524157875323881726.87092138393600000 1234567890.1234567890 1524157875323883675.01905199875019052 -PREHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -560,12 +564,26 @@ STAGE PLANS: TableScan alias: decimal_precision Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(20,10)) outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(dec), sum(dec) + Group By Vectorization: + aggregators: VectorUDAFAvgDecimal(col 0) -> struct, VectorUDAFSumDecimal(col 0) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE @@ -574,9 +592,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_decimal_round.q.out ql/src/test/results/clientpositive/vector_decimal_round.q.out index 1e76ba2..d778f63 100644 --- ql/src/test/results/clientpositive/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/vector_decimal_round.q.out @@ -28,12 +28,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -54,6 +58,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) @@ -82,12 +94,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -108,6 +124,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) @@ -162,12 +186,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -188,6 +216,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) @@ -216,12 +252,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -242,6 +282,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) @@ -296,12 +344,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -313,16 +365,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) @@ -351,12 +428,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -368,16 +449,40 @@ STAGE PLANS: TableScan alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) diff --git ql/src/test/results/clientpositive/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/vector_decimal_round_2.q.out index be190c7..cc94b26 100644 --- ql/src/test/results/clientpositive/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_round_2.q.out @@ -24,20 +24,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_1_orc #### A masked pattern was here #### 55555.000000000000000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8) FROM decimal_tbl_1_orc ORDER BY d PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8) FROM decimal_tbl_1_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,16 +53,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_1_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: round(dec) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 5:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 13:decimal(21,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)) @@ -122,7 +151,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### 125.315000000000000000 -125.315000000000000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -132,7 +161,7 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -142,6 +171,10 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -153,16 +186,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_2_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(pos) (type: decimal(21,0)), round(pos, 0) (type: decimal(21,0)), round(pos, 1) (type: decimal(22,1)), round(pos, 2) (type: decimal(23,2)), round(pos, 3) (type: decimal(24,3)), round(pos, 4) (type: decimal(25,4)), round(pos, -1) (type: decimal(21,0)), round(pos, -2) (type: decimal(21,0)), round(pos, -3) (type: decimal(21,0)), round(pos, -4) (type: decimal(21,0)), round(neg) (type: decimal(21,0)), round(neg, 0) (type: decimal(21,0)), round(neg, 1) (type: decimal(22,1)), round(neg, 2) (type: decimal(23,2)), round(neg, 3) (type: decimal(24,3)), round(neg, 4) (type: decimal(25,4)), round(neg, -1) (type: decimal(21,0)), round(neg, -2) (type: decimal(21,0)), round(neg, -3) (type: decimal(21,0)), round(neg, -4) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 4:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 5:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 6:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 7:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 11:decimal(21,0), FuncRoundDecimalToDecimal(col 1) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 0) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 1) -> 14:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 2) -> 15:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 3) -> 16:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 4) -> 17:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -1) -> 18:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -2) -> 19:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -3) -> 20:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -4) -> 21:decimal(21,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(25,4)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(22,1)), VALUE._col12 (type: decimal(23,2)), VALUE._col13 (type: decimal(24,3)), VALUE._col14 (type: decimal(25,4)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(21,0)), VALUE._col17 (type: decimal(21,0)), VALUE._col18 (type: decimal(21,0)) @@ -231,7 +289,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### 3.141592653589793000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -252,7 +310,7 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -273,6 +331,10 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -284,16 +346,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_3_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: round(dec, -15) (type: decimal(21,0)), round(dec, -16) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -13) (type: decimal(21,0)), round(dec, 4) (type: decimal(25,4)), round(dec, 5) (type: decimal(26,5)), round(dec, 6) (type: decimal(27,6)), round(dec, 7) (type: decimal(28,7)), round(dec, 8) (type: decimal(29,8)), round(dec, 9) (type: decimal(30,9)), round(dec, 10) (type: decimal(31,10)), round(dec, 11) (type: decimal(32,11)), round(dec, 12) (type: decimal(33,12)), round(dec, 13) (type: decimal(34,13)), round(dec, -14) (type: decimal(21,0)), round(dec, 14) (type: decimal(35,14)), round(dec, 15) (type: decimal(36,15)), round(dec, 16) (type: decimal(37,16)), round(dec, -11) (type: decimal(21,0)), round(dec, -12) (type: decimal(21,0)), round(dec, -9) (type: decimal(21,0)), round(dec, -10) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col3, _col31, _col32, _col33, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -15) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -16) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 5:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 10:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 11:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 12:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -13) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 14:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 5) -> 15:decimal(26,5), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 6) -> 16:decimal(27,6), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 7) -> 17:decimal(28,7), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 8) -> 18:decimal(29,8), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 19:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 10) -> 20:decimal(31,10), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 11) -> 21:decimal(32,11), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 12) -> 22:decimal(33,12), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 13) -> 23:decimal(34,13), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -14) -> 24:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 14) -> 25:decimal(35,14), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 15) -> 26:decimal(36,15), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 16) -> 27:decimal(37,16), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -11) -> 28:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -12) -> 29:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -9) -> 30:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -10) -> 31:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 32:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 33:decimal(21,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(21,0)), VALUE._col2 (type: decimal(21,0)), VALUE._col3 (type: decimal(21,0)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)), VALUE._col12 (type: decimal(21,0)), VALUE._col13 (type: decimal(21,0)), VALUE._col14 (type: decimal(21,0)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(22,1)), VALUE._col17 (type: decimal(23,2)), VALUE._col18 (type: decimal(24,3)), VALUE._col19 (type: decimal(25,4)), VALUE._col20 (type: decimal(26,5)), VALUE._col21 (type: decimal(27,6)), VALUE._col22 (type: decimal(28,7)), VALUE._col23 (type: decimal(29,8)), VALUE._col24 (type: decimal(30,9)), VALUE._col25 (type: decimal(31,10)), VALUE._col26 (type: decimal(32,11)), VALUE._col27 (type: decimal(33,12)), VALUE._col28 (type: decimal(34,13)), VALUE._col28 (type: decimal(34,13)), VALUE._col29 (type: decimal(35,14)), VALUE._col30 (type: decimal(36,15)), VALUE._col31 (type: decimal(37,16)) @@ -385,14 +472,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_4_orc #### A masked pattern was here #### 1809242.315111134400000000 -1809242.315111134400000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -404,16 +495,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_4_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 2:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 9) -> 3:decimal(30,9) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(30,9)), VALUE._col0 (type: decimal(30,9)), 1809242.315111134 (type: decimal(17,9)), -1809242.315111134 (type: decimal(17,9)) diff --git ql/src/test/results/clientpositive/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/vector_decimal_udf2.q.out index 4e24fa6..cc801b8 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf2.q.out @@ -48,14 +48,18 @@ POSTHOOK: Input: default@decimal_udf2_txt POSTHOOK: Output: default@decimal_udf2 POSTHOOK: Lineage: decimal_udf2.key SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf2.value SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:value, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,21 +71,44 @@ STAGE PLANS: TableScan alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8] + selectExpressions: ConstantVectorExpression(val NaN) -> 2:double, ConstantVectorExpression(val NaN) -> 3:double, ConstantVectorExpression(val 1.4711276743037347) -> 4:double, ConstantVectorExpression(val -0.8390715290764524) -> 5:double, ConstantVectorExpression(val -0.5440211108893698) -> 6:double, ConstantVectorExpression(val 0.6483608274590866) -> 7:double, ConstantVectorExpression(val 0.17453292519943295) -> 8:double Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -100,20 +127,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### NaN NaN 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,21 +156,44 @@ STAGE PLANS: TableScan alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 2:double, ConstantVectorExpression(val 2.302585092994046) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 1.0) -> 5:double, FuncLogWithBaseLongToDouble(col 1) -> 6:double, VectorUDFAdaptor(log(value, 10)) -> 7:double, ConstantVectorExpression(val 1.0) -> 8:double, ConstantVectorExpression(val 3.1622776601683795) -> 9:double Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_distinct_2.q.out ql/src/test/results/clientpositive/vector_distinct_2.q.out index 8fb5c50..9d1e253 100644 --- ql/src/test/results/clientpositive/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/vector_distinct_2.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,11 +122,24 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: t, s + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -131,10 +148,31 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/vector_elt.q.out ql/src/test/results/clientpositive/vector_elt.q.out index 08ca167..233255a 100644 --- ql/src/test/results/clientpositive/vector_elt.q.out +++ ql/src/test/results/clientpositive/vector_elt.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -17,24 +21,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (ctinyint > 0) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 6, 2, 16] + selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -64,7 +94,7 @@ POSTHOOK: Input: default@alltypesorc 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 2 cvLH6Eat2yFsyy7p 528534767 528534767 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -77,7 +107,7 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -90,25 +120,64 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: ConstantVectorExpression(val defg) -> 12:string, ConstantVectorExpression(val cc) -> 13:string, ConstantVectorExpression(val abc) -> 14:string, ConstantVectorExpression(val 2) -> 15:string, ConstantVectorExpression(val 12345) -> 16:string, ConstantVectorExpression(val 123456789012) -> 17:string, ConstantVectorExpression(val 1.25) -> 18:string, ConstantVectorExpression(val 16.0) -> 19:string, ConstantVectorExpression(val null) -> 20:string, ConstantVectorExpression(val null) -> 21:string + Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out index ac7ee0c..97a0f6a 100644 --- ql/src/test/results/clientpositive/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cstring1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cstring1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -15,15 +19,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13)(children: CastLongToBooleanViaLongToLong(col 12)(children: StringLength(col 6) -> 12:Long) -> 13:long) -> boolean predicate: cstring1 (type: string) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -31,11 +53,32 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE @@ -62,12 +105,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 6041 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -79,11 +126,25 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 12)(children: CastLongToBooleanViaLongToLong(col 2) -> 12:long) -> boolean predicate: cint (type: int) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -91,11 +152,32 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE @@ -122,12 +204,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 6082 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cfloat PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -139,15 +225,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 12)(children: CastDoubleToBooleanViaDoubleToLong(col 4) -> 12:long) -> boolean predicate: cfloat (type: float) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -155,11 +259,32 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE @@ -186,12 +311,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 3022 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -203,15 +332,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 12)(children: CastTimestampToBoolean(col 8) -> 12:long) -> boolean predicate: ctimestamp1 (type: timestamp) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -219,11 +366,32 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_groupby4.q.out ql/src/test/results/clientpositive/vector_groupby4.q.out index cf65d7a..cfcc584 100644 --- ql/src/test/results/clientpositive/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -18,14 +18,18 @@ POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -39,18 +43,47 @@ STAGE PLANS: TableScan alias: srcorc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: substr(key, 1, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: StringSubstrColStartLen(col 0, start 0, length 1) -> 2:string Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 @@ -71,8 +104,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/vector_groupby6.q.out ql/src/test/results/clientpositive/vector_groupby6.q.out index ecb361c..62ab5b1 100644 --- ql/src/test/results/clientpositive/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -18,14 +18,18 @@ POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -39,18 +43,47 @@ STAGE PLANS: TableScan alias: srcorc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: substr(value, 5, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: StringSubstrColStartLen(col 1, start 4, length 1) -> 2:string Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 @@ -71,8 +104,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/vector_groupby_3.q.out ql/src/test/results/clientpositive/vector_groupby_3.q.out index be37cac..2a46a2e 100644 --- ql/src/test/results/clientpositive/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/vector_groupby_3.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,12 +122,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string), b (type: bigint) outputColumnNames: t, s, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [0] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -132,12 +150,33 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 7820831..feada86 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -1,18 +1,22 @@ Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Stage-8:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from src where not key in (select key from src) order by key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from src where not key in (select key from src) order by key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-10 depends on stages: Stage-4 @@ -38,6 +42,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(key) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE @@ -45,9 +53,21 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE @@ -96,6 +116,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -141,6 +165,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -153,6 +181,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -217,6 +253,10 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: key (type: string) mode: hash outputColumnNames: _col0 @@ -226,8 +266,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index 9e0abd4..d64993c 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -211,7 +211,7 @@ POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sa POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number from @@ -219,7 +219,7 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number from @@ -227,6 +227,10 @@ from group by ss_ticket_number limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -238,11 +242,24 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9 + native: false + projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -251,11 +268,32 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -315,7 +353,7 @@ POSTHOOK: Input: default@store_sales 18 19 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(ss_ticket_number) m from @@ -327,7 +365,7 @@ from group by ss_ticket_number order by m PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(ss_ticket_number) m from @@ -339,6 +377,10 @@ from group by ss_ticket_number order by m POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -351,11 +393,24 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9 + native: false + projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -364,16 +419,41 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 @@ -397,6 +477,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -524,7 +612,7 @@ POSTHOOK: Input: default@store_sales 80 81 82 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -536,7 +624,7 @@ from group by ss_ticket_number order by ss_ticket_number PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -548,6 +636,10 @@ from group by ss_ticket_number order by ss_ticket_number POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -560,12 +652,26 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9, 2, 10] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9, col 2 + native: false + projectedOutputColumns: [0] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -574,12 +680,33 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -590,6 +717,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col2) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col1 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 @@ -610,6 +741,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) @@ -737,7 +876,7 @@ POSTHOOK: Input: default@store_sales 80 151471 704 81 105109 429 82 55611 254 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from @@ -749,7 +888,7 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from @@ -761,6 +900,10 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -773,12 +916,26 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9, 2, 10] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9, col 2 + native: false + projectedOutputColumns: [0] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -787,12 +944,33 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -803,6 +981,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col1 (type: int), _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 @@ -823,6 +1005,14 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index 58d1f87..94bf387 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -123,16 +123,20 @@ POSTHOOK: Lineage: store.s_street_type SIMPLE [(store_txt)store_txt.FieldSchema( POSTHOOK: Lineage: store.s_suite_number SIMPLE [(store_txt)store_txt.FieldSchema(name:s_suite_number, type:string, comment:null), ] POSTHOOK: Lineage: store.s_tax_precentage SIMPLE [(store_txt)store_txt.FieldSchema(name:s_tax_precentage, type:decimal(5,2), comment:null), ] POSTHOOK: Lineage: store.s_zip SIMPLE [(store_txt)store_txt.FieldSchema(name:s_zip, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s_store_id from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s_store_id from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -149,6 +153,10 @@ STAGE PLANS: outputColumnNames: s_store_id Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: s_store_id (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -158,8 +166,22 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -198,16 +220,20 @@ AAAAAAAAEAAAAAAA AAAAAAAAHAAAAAAA AAAAAAAAIAAAAAAA AAAAAAAAKAAAAAAA -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -224,6 +250,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -233,8 +263,22 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/vector_if_expr.q.out ql/src/test/results/clientpositive/vector_if_expr.q.out index 0a82be9..73cbcdf 100644 --- ql/src/test/results/clientpositive/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/vector_if_expr.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -15,19 +19,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsTrue(col 10) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: (cboolean1 and cboolean1 is not null) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 12] + selectExpressions: IfExprStringScalarStringScalar(col 10, val first, val second) -> 12:String Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_include_no_sel.q.out ql/src/test/results/clientpositive/vector_include_no_sel.q.out index e2dcb5d..f24bd24 100644 --- ql/src/test/results/clientpositive/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/vector_include_no_sel.q.out @@ -163,16 +163,20 @@ POSTHOOK: Lineage: customer_demographics.cd_gender SIMPLE [(customer_demographic POSTHOOK: Lineage: customer_demographics.cd_marital_status SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_marital_status, type:string, comment:null), ] POSTHOOK: Lineage: customer_demographics.cd_purchase_estimate SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_purchase_estimate, type:int, comment:null), ] Warning: Map Join MAPJOIN[15][bigTable=store_sales] in task 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -201,34 +205,77 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col2, _col16 Statistics: Num rows: 200000 Data size: 92055200 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0, col 2) -> boolean, FilterStringGroupColEqualStringScalar(col 1, val M) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0, col 2) -> boolean, FilterStringGroupColEqualStringScalar(col 1, val U) -> boolean) -> boolean) -> boolean predicate: (((_col0 = _col16) and (_col2 = 'M')) or ((_col0 = _col16) and (_col2 = 'U'))) (type: boolean) Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 3:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_interval_1.q.out ql/src/test/results/clientpositive/vector_interval_1.q.out index 38e2ca0..aca8f6c 100644 --- ql/src/test/results/clientpositive/vector_interval_1.q.out +++ ql/src/test/results/clientpositive/vector_interval_1.q.out @@ -38,20 +38,24 @@ POSTHOOK: Lineage: vector_interval_1.dt EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -63,16 +67,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: str1 (type: string), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time) outputColumnNames: _col0, _col2, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 4, 5] + selectExpressions: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col4 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: interval_year_month), VALUE._col0 (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time) @@ -110,7 +139,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 1-2 NULL 1 02:03:04.000000000 NULL 1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -121,7 +150,7 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -132,6 +161,10 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -143,16 +176,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 6, 5, 8, 7] + selectExpressions: IntervalYearMonthColAddIntervalYearMonthColumn(col 4, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 5:long, IntervalYearMonthColSubtractIntervalYearMonthColumn(col 4, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, IntervalYearMonthScalarSubtractIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 7:long Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2-4 (type: interval_year_month), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), 0-0 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month) @@ -198,7 +256,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 2-4 NULL NULL 0-0 NULL NULL 2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -209,7 +267,7 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -220,6 +278,10 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -231,16 +293,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 6, 5, 8, 7] + selectExpressions: IntervalDayTimeColAddIntervalDayTimeColumn(col 4, col 5)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time) -> 6:interval_day_time, IntervalDayTimeScalarAddIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 5:timestamp, IntervalDayTimeColSubtractIntervalDayTimeColumn(col 4, col 7)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 7:interval_day_time) -> 8:interval_day_time, IntervalDayTimeScalarSubtractIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 7:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2 04:06:08.000000000 (type: interval_day_time), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) @@ -286,7 +373,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL 2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -303,7 +390,7 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -320,6 +407,10 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -331,16 +422,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 6, 5, 8, 7, 10, 11, 13, 14, 15, 16, 17] + selectExpressions: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 4:long, DateColAddIntervalYearMonthColumn(col 1, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 1-2, col 1) -> 5:long, IntervalYearMonthColAddDateColumn(col 7, col 1)(children: CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, DateColSubtractIntervalYearMonthScalar(col 1, val 1-2) -> 7:long, DateColSubtractIntervalYearMonthColumn(col 1, col 9)(children: CastStringToIntervalYearMonth(col 2) -> 9:interval_year_month) -> 10:long, DateColAddIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 11:timestamp, DateColAddIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddDateColumn(val 1 02:03:04.000000000, col 1) -> 14:timestamp, IntervalDayTimeColAddDateColumn(col 12, col 1)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:interval_day_time, DateColSubtractIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 16:timestamp, DateColSubtractIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) @@ -398,7 +514,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -415,7 +531,7 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -432,6 +548,10 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -443,16 +563,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17] + selectExpressions: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 4:timestamp, TimestampColAddIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 1-2, col 0) -> 7:timestamp, IntervalYearMonthColAddTimestampColumn(col 5, col 0)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-2) -> 9:timestamp, TimestampColSubtractIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:timestamp, TimestampColAddIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 11:timestamp, TimestampColAddIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 1 02:03:04.000000000, col 0) -> 14:timestamp, IntervalDayTimeColAddTimestampColumn(col 12, col 0)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 16:timestamp, TimestampColSubtractIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) @@ -510,7 +655,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts, ts - ts, @@ -518,7 +663,7 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts, ts - ts, @@ -526,6 +671,10 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -537,16 +686,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 5, 6] + selectExpressions: TimestampColSubtractTimestampColumn(col 0, col 0) -> 4:interval_day_time, TimestampScalarSubtractTimestampColumn(val 2001-01-01 01:02:03.0, col 0) -> 5:timestamp, TimestampColSubtractTimestampScalar(col 0, val 2001-01-01 01:02:03.0) -> 6:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) @@ -586,7 +760,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL NULL NULL NULL 2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, dt - dt, @@ -594,7 +768,7 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, dt - dt, @@ -602,6 +776,10 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -613,16 +791,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 5, 6] + selectExpressions: DateColSubtractDateColumn(col 1, col 1) -> 4:timestamp, DateScalarSubtractDateColumn(val 2001-01-01 00:00:00.0, col 1) -> 5:timestamp, DateColSubtractDateScalar(col 1, val 2001-01-01 00:00:00.0) -> 6:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) @@ -662,7 +865,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL NULL NULL NULL 2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, ts - dt, @@ -673,7 +876,7 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, ts - dt, @@ -684,6 +887,10 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -695,16 +902,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 5, 6, 7, 8, 9] + selectExpressions: TimestampColSubtractDateColumn(col 0, col 1) -> 4:interval_day_time, TimestampScalarSubtractDateColumn(val 2001-01-01 01:02:03.0, col 1) -> 5:interval_day_time, TimestampColSubtractDateScalar(col 0, val 2001-01-01 00:00:00.0) -> 6:interval_day_time, DateColSubtractTimestampColumn(col 1, col 0) -> 7:interval_day_time, DateColSubtractTimestampScalar(col 1, val 2001-01-01 01:02:03.0) -> 8:interval_day_time, DateScalarSubtractTimestampColumn(val 2001-01-01 00:00:00.0, col 0) -> 9:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) diff --git ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out index dd4b7d1..cc37286 100644 --- ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out +++ ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out @@ -35,7 +35,7 @@ POSTHOOK: Output: default@interval_arithmetic_1 POSTHOOK: Lineage: interval_arithmetic_1.dateval EXPRESSION [(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, comment:null), ] POSTHOOK: Lineage: interval_arithmetic_1.tsval SIMPLE [(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, comment:null), ] tsval tsval -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -47,7 +47,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -60,6 +60,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,16 +75,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 2-2) (type: date), (dateval - -2-2) (type: date), (dateval + 2-2) (type: date), (dateval + -2-2) (type: date), (-2-2 + dateval) (type: date), (2-2 + dateval) (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7] + selectExpressions: DateColSubtractIntervalYearMonthScalar(col 0, val 2-2) -> 2:long, DateColSubtractIntervalYearMonthScalar(col 0, val -2-2) -> 3:long, DateColAddIntervalYearMonthScalar(col 0, val 2-2) -> 4:long, DateColAddIntervalYearMonthScalar(col 0, val -2-2) -> 5:long, IntervalYearMonthScalarAddDateColumn(val -2-2, col 0) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 2-2, col 0) -> 7:long Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date) @@ -177,7 +206,7 @@ dateval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 9073-04-13 9077-08-13 9077-08-13 9073-04-13 9073-04-13 9077-08-13 9209-11-11 9207-09-11 9212-01-11 9212-01-11 9207-09-11 9207-09-11 9212-01-11 9403-01-09 9400-11-09 9405-03-09 9405-03-09 9400-11-09 9400-11-09 9405-03-09 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -186,7 +215,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -196,6 +225,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -207,16 +240,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 1999-06-07) (type: interval_day_time), (1999-06-07 - dateval) (type: interval_day_time), (dateval - dateval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4] + selectExpressions: DateColSubtractDateScalar(col 0, val 1999-06-07 00:00:00.0) -> 2:timestamp, DateScalarSubtractDateColumn(val 1999-06-07 00:00:00.0, col 0) -> 3:timestamp, DateColSubtractDateColumn(col 0, col 0) -> 4:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) @@ -307,7 +365,7 @@ dateval _c1 _c2 _c3 9075-06-13 2584462 00:00:00.000000000 -2584462 00:00:00.000000000 0 00:00:00.000000000 9209-11-11 2633556 01:00:00.000000000 -2633556 01:00:00.000000000 0 00:00:00.000000000 9403-01-09 2704106 01:00:00.000000000 -2704106 01:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -319,7 +377,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -332,6 +390,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -343,16 +405,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tsval (type: timestamp), (tsval - 2-2) (type: timestamp), (tsval - -2-2) (type: timestamp), (tsval + 2-2) (type: timestamp), (tsval + -2-2) (type: timestamp), (-2-2 + tsval) (type: timestamp), (2-2 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7] + selectExpressions: TimestampColSubtractIntervalYearMonthScalar(col 1, val 2-2) -> 2:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 1, val -2-2) -> 3:timestamp, TimestampColAddIntervalYearMonthScalar(col 1, val 2-2) -> 4:timestamp, TimestampColAddIntervalYearMonthScalar(col 1, val -2-2) -> 5:timestamp, IntervalYearMonthScalarAddTimestampColumn(val -2-2, col 1) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 2-2, col 1) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) @@ -449,7 +536,7 @@ tsval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9209-11-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9212-01-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9403-01-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 9405-03-09 18:12:33.547 9400-11-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -457,7 +544,7 @@ from interval_arithmetic_1 order by interval '2-2' year to month + interval '3-3' year to month limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -466,6 +553,10 @@ order by interval '2-2' year to month + interval '3-3' year to month limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -477,14 +568,38 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: CAST( 5-5 AS INTERVAL YEAR TO MONTH) (type: interval_year_month) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 5-5 (type: interval_year_month), -1-1 (type: interval_year_month) @@ -528,7 +643,7 @@ POSTHOOK: Input: default@interval_arithmetic_1 _c0 _c1 5-5 -1-1 5-5 -1-1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -540,7 +655,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -553,6 +668,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -564,16 +683,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 99 11:22:33.123456789) (type: timestamp), (dateval - -99 11:22:33.123456789) (type: timestamp), (dateval + 99 11:22:33.123456789) (type: timestamp), (dateval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + dateval) (type: timestamp), (99 11:22:33.123456789 + dateval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7] + selectExpressions: DateColSubtractIntervalDayTimeScalar(col 0, val 99 11:22:33.123456789) -> 2:timestamp, DateColSubtractIntervalDayTimeScalar(col 0, val -99 11:22:33.123456789) -> 3:timestamp, DateColAddIntervalDayTimeScalar(col 0, val 99 11:22:33.123456789) -> 4:timestamp, DateColAddIntervalDayTimeScalar(col 0, val -99 11:22:33.123456789) -> 5:timestamp, IntervalDayTimeScalarAddDateColumn(val -99 11:22:33.123456789, col 0) -> 6:timestamp, IntervalDayTimeScalarAddDateColumn(val 99 11:22:33.123456789, col 0) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) @@ -670,7 +814,7 @@ dateval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9075-09-20 11:22:33.123456789 9075-03-05 11:37:26.876543211 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9209-11-11 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9210-02-18 11:22:33.123456789 9209-08-03 13:37:26.876543211 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9403-01-09 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 9403-04-18 12:22:33.123456789 9402-10-01 13:37:26.876543211 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, tsval, @@ -680,7 +824,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, tsval, @@ -691,6 +835,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -702,16 +850,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), tsval (type: timestamp), (dateval - tsval) (type: interval_day_time), (tsval - dateval) (type: interval_day_time), (tsval - tsval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + selectExpressions: DateColSubtractTimestampColumn(col 0, col 1) -> 2:interval_day_time, TimestampColSubtractDateColumn(col 1, col 0) -> 3:interval_day_time, TimestampColSubtractTimestampColumn(col 1, col 1) -> 4:interval_day_time Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) @@ -804,7 +977,7 @@ dateval tsval _c2 _c3 _c4 9075-06-13 9075-06-13 16:20:09.218517797 -0 16:20:09.218517797 0 16:20:09.218517797 0 00:00:00.000000000 9209-11-11 9209-11-11 04:08:58.223768453 -0 04:08:58.223768453 0 04:08:58.223768453 0 00:00:00.000000000 9403-01-09 9403-01-09 18:12:33.547 -0 18:12:33.547000000 0 18:12:33.547000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -816,7 +989,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -829,6 +1002,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -840,16 +1017,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tsval (type: timestamp), (tsval - 99 11:22:33.123456789) (type: timestamp), (tsval - -99 11:22:33.123456789) (type: timestamp), (tsval + 99 11:22:33.123456789) (type: timestamp), (tsval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + tsval) (type: timestamp), (99 11:22:33.123456789 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7] + selectExpressions: TimestampColSubtractIntervalDayTimeScalar(col 1, val 99 11:22:33.123456789) -> 2:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 1, val -99 11:22:33.123456789) -> 3:timestamp, TimestampColAddIntervalDayTimeScalar(col 1, val 99 11:22:33.123456789) -> 4:timestamp, TimestampColAddIntervalDayTimeScalar(col 1, val -99 11:22:33.123456789) -> 5:timestamp, IntervalDayTimeScalarAddTimestampColumn(val -99 11:22:33.123456789, col 1) -> 6:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 99 11:22:33.123456789, col 1) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) @@ -946,14 +1148,14 @@ tsval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 16:20:09.218517797 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9075-09-21 03:42:42.341974586 9075-03-06 03:57:36.095061008 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9209-11-11 04:08:58.223768453 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9210-02-18 15:31:31.347225242 9209-08-03 17:46:25.100311664 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9403-01-09 18:12:33.547 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 9403-04-19 06:35:06.670456789 9402-10-02 07:50:00.423543211 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second from interval_arithmetic_1 limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second @@ -961,6 +1163,10 @@ from interval_arithmetic_1 limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -972,21 +1178,43 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: 109 20:30:40.246913578 (type: interval_day_time), 89 02:14:26.000000000 (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + selectExpressions: ConstantVectorExpression(val 109 20:30:40.246913578) -> 2:interval_day_time, ConstantVectorExpression(val 89 02:14:26.000000000) -> 3:interval_day_time Statistics: Num rows: 50 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out index 13a1bac..d2a879d 100644 --- ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out @@ -136,7 +136,7 @@ POSTHOOK: Lineage: vectortab_b_1korc.si SIMPLE [(vectortab_b_1k)vectortab_b_1k.F POSTHOOK: Lineage: vectortab_b_1korc.t SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts2 SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select v1.s, v2.s, @@ -158,7 +158,7 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select v1.s, v2.s, @@ -180,6 +180,10 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -215,12 +219,24 @@ STAGE PLANS: TableScan alias: vectortab_a_1korc Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 14] + selectExpressions: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -228,20 +244,40 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: interval_day_time) 1 _col0 (type: string), _col1 (type: interval_day_time) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/vector_join_part_col_char.q.out index 6b964ed..16fdeeb 100644 --- ql/src/test/results/clientpositive/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/vector_join_part_col_char.q.out @@ -95,10 +95,14 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@char_tbl2 gpa=3 gpa=3.5 -PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY -POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -133,6 +137,13 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: char(50)) Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/vector_left_outer_join.q.out ql/src/test/results/clientpositive/vector_left_outer_join.q.out index 644b0dc..5e88346 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,6 +16,10 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -92,8 +96,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out index a1257ce..2b62ff5 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -76,12 +76,16 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -165,12 +169,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -254,12 +262,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -295,9 +307,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -308,20 +327,40 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -344,12 +383,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -385,9 +428,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -398,20 +448,40 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -434,12 +504,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -475,9 +549,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -488,20 +569,40 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -524,12 +625,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -565,9 +670,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -578,20 +690,40 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index eaaf3e9..bf21796 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -1,15 +1,19 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-12 depends on stages: Stage-1 @@ -34,6 +38,10 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -43,8 +51,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -97,6 +117,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -140,6 +164,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -180,6 +208,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -234,6 +266,10 @@ STAGE PLANS: outputColumnNames: l_orderkey Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 @@ -243,8 +279,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -286,18 +334,22 @@ POSTHOOK: Input: default@lineitem 61336 8855 64128 9141 82704 7721 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-19 depends on stages: Stage-1 @@ -326,6 +378,10 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -335,8 +391,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -389,6 +457,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -432,6 +504,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -472,6 +548,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -522,6 +602,10 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -531,8 +615,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -580,6 +676,10 @@ STAGE PLANS: outputColumnNames: _col2 Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col2 (type: int) mode: hash outputColumnNames: _col0 @@ -589,10 +689,22 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -640,6 +752,10 @@ STAGE PLANS: outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col0 (type: int), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -649,10 +765,22 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out index e5a62a3..b6cdce1 100644 --- ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out @@ -208,7 +208,7 @@ stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@store -PREHOOK: query: explain select +PREHOOK: query: explain vectorization select s_state, count(1) from store_sales, store, @@ -220,7 +220,7 @@ PREHOOK: query: explain select order by s_state limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select +POSTHOOK: query: explain vectorization select s_state, count(1) from store_sales, store, @@ -232,6 +232,10 @@ POSTHOOK: query: explain select order by s_state limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -275,6 +279,13 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -316,6 +327,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -353,6 +371,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -377,6 +403,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_multi_insert.q.out ql/src/test/results/clientpositive/vector_multi_insert.q.out index e9f106d..4013cd4 100644 --- ql/src/test/results/clientpositive/vector_multi_insert.q.out +++ ql/src/test/results/clientpositive/vector_multi_insert.q.out @@ -65,16 +65,20 @@ POSTHOOK: query: analyze table orc1 compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@orc1 POSTHOOK: Output: default@orc1 -PREHOOK: query: explain from orc1 a +PREHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 PREHOOK: type: QUERY -POSTHOOK: query: explain from orc1 a +POSTHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 @@ -152,6 +156,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-9 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out index 8845cb2..d9591d0 100644 --- ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out +++ ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: explain SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +PREHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +POSTHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -27,6 +31,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is int but the common type is bigint + vectorized: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_non_string_partition.q.out ql/src/test/results/clientpositive/vector_non_string_partition.q.out index 11677b7..1d60bf2 100644 --- ql/src/test/results/clientpositive/vector_non_string_partition.q.out +++ ql/src/test/results/clientpositive/vector_non_string_partition.q.out @@ -27,10 +27,14 @@ POSTHOOK: query: SHOW PARTITIONS non_string_part POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@non_string_part ctinyint=__HIVE_DEFAULT_PARTITION__ -PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -42,20 +46,48 @@ STAGE PLANS: TableScan alias: non_string_part Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), ctinyint (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4] Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) @@ -98,10 +130,14 @@ POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ 799471 NULL 1248059 NULL 1286921 NULL -PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -113,19 +149,47 @@ STAGE PLANS: TableScan alias: non_string_part Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) diff --git ql/src/test/results/clientpositive/vector_null_projection.q.out ql/src/test/results/clientpositive/vector_null_projection.q.out index aa923a6..94aea2f 100644 --- ql/src/test/results/clientpositive/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/vector_null_projection.q.out @@ -28,12 +28,16 @@ POSTHOOK: query: insert into table b values('aaa') POSTHOOK: type: QUERY POSTHOOK: Output: default@b POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select NULL from a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select NULL from a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -56,6 +60,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -72,12 +82,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@a #### A masked pattern was here #### NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select NULL as x from a union distinct select NULL as x from b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select NULL as x from a union distinct select NULL as x from b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -96,6 +110,10 @@ STAGE PLANS: Select Operator Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: null (type: void) mode: hash outputColumnNames: _col0 @@ -115,6 +133,10 @@ STAGE PLANS: Select Operator Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: null (type: void) mode: hash outputColumnNames: _col0 @@ -124,8 +146,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: void) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: void) mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/vector_nvl.q.out ql/src/test/results/clientpositive/vector_nvl.q.out index 8330810..08cc168 100644 --- ql/src/test/results/clientpositive/vector_nvl.q.out +++ ql/src/test/results/clientpositive/vector_nvl.q.out @@ -1,13 +1,17 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -19,24 +23,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean predicate: cdouble is null (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: null (type: double), 100.0 (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: ConstantVectorExpression(val null) -> 12:double, ConstantVectorExpression(val 100.0) -> 13:double Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -68,14 +98,18 @@ NULL 100.0 NULL 100.0 NULL 100.0 NULL 100.0 -PREHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -87,21 +121,43 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float), NVL(cfloat,1) (type: float) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 13] + selectExpressions: VectorCoalesce(columns [4, 12])(children: col 4, ConstantVectorExpression(val 1.0) -> 12:double) -> 13:float Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -131,33 +187,72 @@ NULL 1.0 27.0 27.0 -11.0 -11.0 61.0 61.0 -PREHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 10 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: ConstantVectorExpression(val 10) -> 12:long + Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 10 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: SELECT nvl(null, 10) as n FROM alltypesorc @@ -181,33 +276,55 @@ POSTHOOK: Input: default@alltypesorc 10 10 10 -PREHOOK: query: EXPLAIN SELECT nvl(null, null) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, null) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: null (type: void) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: SELECT nvl(null, null) as n FROM alltypesorc diff --git ql/src/test/results/clientpositive/vector_orderby_5.q.out ql/src/test/results/clientpositive/vector_orderby_5.q.out index 4840b41..ca07e9e 100644 --- ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -119,12 +123,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 7 + native: false + projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -133,12 +151,33 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -159,6 +198,14 @@ STAGE PLANS: sort order: - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_outer_join0.q.out ql/src/test/results/clientpositive/vector_outer_join0.q.out index a8765cd..38772cb 100644 --- ql/src/test/results/clientpositive/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/vector_outer_join0.q.out @@ -58,12 +58,16 @@ POSTHOOK: Input: default@orc_table_2 4 FOUR NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -96,9 +100,16 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -106,16 +117,38 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, string Local Work: Map Reduce Local Work @@ -141,12 +174,16 @@ one 1 NULL NULL one 1 NULL NULL three 3 3 THREE two 2 2 TWO -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -179,9 +216,16 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -189,16 +233,38 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, bigint Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index 32c335d..3df63dc 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -214,18 +214,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -258,9 +262,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -268,16 +279,38 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col2 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, double, double, string, string, timestamp, timestamp, bigint, bigint Local Work: Map Reduce Local Work @@ -320,18 +353,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -364,9 +401,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -374,16 +418,37 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -512,7 +577,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -521,7 +586,7 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -530,6 +595,10 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -577,9 +646,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -587,6 +663,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -595,23 +676,60 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 17 Data size: 4017 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index 07d6107..ec7de23 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -224,7 +224,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -233,7 +233,7 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -242,6 +242,10 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -289,9 +293,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int), cbigint (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -299,6 +310,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 22 Data size: 4600 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -307,23 +323,60 @@ STAGE PLANS: keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 24 Data size: 5060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index 52271c7..49c658b 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -224,7 +224,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -233,7 +233,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -242,104 +242,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -361,7 +264,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -370,7 +273,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -379,104 +282,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring2 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -498,7 +304,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 28 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -507,7 +313,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -516,104 +322,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index 610f7a7..fce35a1 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -244,79 +244,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint","bigint","bigint","double","double","string","string","timestamp","timestamp","bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -385,79 +325,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:hd - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd @@ -882,7 +762,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -891,7 +771,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -900,104 +780,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/results/clientpositive/vector_outer_join6.q.out ql/src/test/results/clientpositive/vector_outer_join6.q.out index 7bcb1a9..a910ed2 100644 --- ql/src/test/results/clientpositive/vector_outer_join6.q.out +++ ql/src/test/results/clientpositive/vector_outer_join6.q.out @@ -122,106 +122,15 @@ POSTHOOK: Output: default@TJOIN4 POSTHOOK: Lineage: tjoin4.c1 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin4.c2 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin4.rnum SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:tjoin2 - Fetch Operator - limit: -1 - $hdt$_1:tjoin3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:tjoin2 - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - $hdt$_1:tjoin3 - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_1:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_1:tjoin2":{"TableScan":{"alias:":"tjoin2","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"}}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"}}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col3"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col1 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -240,102 +149,15 @@ POSTHOOK: Input: default@tjoin3 0 3 0 1 NULL NULL 2 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:tjoin2 - Fetch Operator - limit: -1 - $hdt$_1:tjoin3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:tjoin2 - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - $hdt$_1:tjoin3 - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_1:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_1:tjoin2":{"TableScan":{"alias:":"tjoin2","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"}}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"c1 (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"}}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_reduce1.q.out ql/src/test/results/clientpositive/vector_reduce1.q.out index f22ac16..9f6969f 100644 --- ql/src/test/results/clientpositive/vector_reduce1.q.out +++ ql/src/test/results/clientpositive/vector_reduce1.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select b from vectortab2korc order by b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select b from vectortab2korc order by b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,15 +122,39 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: b (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_reduce2.q.out ql/src/test/results/clientpositive/vector_reduce2.q.out index 8f5b618..cc1ac41 100644 --- ql/src/test/results/clientpositive/vector_reduce2.q.out +++ ql/src/test/results/clientpositive/vector_reduce2.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,15 +122,39 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: s (type: string), i (type: int), s2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 2, 9] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) diff --git ql/src/test/results/clientpositive/vector_reduce3.q.out ql/src/test/results/clientpositive/vector_reduce3.q.out index f4220e3..2a21fd9 100644 --- ql/src/test/results/clientpositive/vector_reduce3.q.out +++ ql/src/test/results/clientpositive/vector_reduce3.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s from vectortab2korc order by s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s from vectortab2korc order by s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,15 +122,39 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: s (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index 5d7000a..7094c9c 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -14,20 +14,24 @@ POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -40,11 +44,25 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 2) -> 4:Boolean) -> boolean, SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 3) -> 4:Boolean) -> boolean) -> boolean predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean) Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cdecimal1) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 2) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -53,13 +71,34 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: int), KEY._col1 (type: double), KEY._col2 (type: decimal(20,10)), KEY._col3 (type: decimal(23,14)) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -81,6 +120,14 @@ STAGE PLANS: Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(20,10)), KEY.reducesinkkey3 (type: decimal(23,14)), VALUE._col0 (type: decimal(20,10)) diff --git ql/src/test/results/clientpositive/vector_string_concat.q.out ql/src/test/results/clientpositive/vector_string_concat.q.out index 30ee10f..a06c09e 100644 --- ql/src/test/results/clientpositive/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -95,16 +95,20 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT s AS `string`, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT s AS `string`, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -116,21 +120,43 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 12, 11] + selectExpressions: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 12:String_Family, StringGroupColConcatStringScalar(col 13, val |)(children: StringScalarConcatStringGroupCol(val |, col 11)(children: StringRTrim(col 13)(children: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 13:String_Family) -> 11:String) -> 13:String_Family) -> 11:String_Family Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -275,20 +301,24 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -301,11 +331,25 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [19] + selectExpressions: StringGroupConcatColCol(col 17, col 18)(children: StringGroupColConcatStringScalar(col 18, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 17)(children: CastLongToString(col 13)(children: CastDoubleToLong(col 15)(children: DoubleColAddDoubleScalar(col 16, val 1.0)(children: DoubleColDivideDoubleScalar(col 15, val 3.0)(children: CastLongToDouble(col 14)(children: LongColSubtractLongScalar(col 13, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 13:long) -> 14:long) -> 15:double) -> 16:double) -> 15:double) -> 13:long) -> 17:String) -> 18:String_Family) -> 17:String_Family, CastLongToString(col 13)(children: VectorUDFYearDate(col 12, field YEAR) -> 13:long) -> 18:String) -> 19:String_Family Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 19 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -314,11 +358,32 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 @@ -339,6 +404,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) diff --git ql/src/test/results/clientpositive/vector_string_decimal.q.out ql/src/test/results/clientpositive/vector_string_decimal.q.out index 74600cd..3540635 100644 --- ql/src/test/results/clientpositive/vector_string_decimal.q.out +++ ql/src/test/results/clientpositive/vector_string_decimal.q.out @@ -38,12 +38,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_decimal POSTHOOK: Lineage: orc_decimal.id SIMPLE [(staging)staging.FieldSchema(name:id, type:decimal(18,0), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from orc_decimal where id in ('100000000', '200000000') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from orc_decimal where id in ('100000000', '200000000') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,6 +73,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is decimal(18,0) but the common type is string + vectorized: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_struct_in.q.out ql/src/test/results/clientpositive/vector_struct_in.q.out index c1ce773..07923ea 100644 --- ql/src/test/results/clientpositive/vector_struct_in.q.out +++ ql/src/test/results/clientpositive/vector_struct_in.q.out @@ -14,7 +14,7 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_1.lineid SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -28,7 +28,7 @@ struct('nine','1'), struct('ten','1') ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -42,6 +42,10 @@ struct('nine','1'), struct('ten','1') ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -53,21 +57,43 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: string), lineid (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -107,7 +133,7 @@ POSTHOOK: Input: default@test_1 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -121,7 +147,7 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -135,6 +161,10 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -146,18 +176,37 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: id (type: string), lineid (type: string), (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> 3:boolean Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -213,7 +262,7 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_2.lineid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -227,7 +276,7 @@ struct(9,1), struct(10,1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -241,6 +290,10 @@ struct(9,1), struct(10,1) ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -252,21 +305,43 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), lineid (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -306,7 +381,7 @@ POSTHOOK: Input: default@test_2 #### A masked pattern was here #### 1 1 7 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -320,7 +395,7 @@ struct(9,1), struct(10,1) ) as b from test_2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -334,6 +409,10 @@ struct(9,1), struct(10,1) ) as b from test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -345,18 +424,37 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: id (type: int), lineid (type: int), (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> 3:boolean Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -412,7 +510,7 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test_3 POSTHOOK: Lineage: test_3.id SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_3.lineid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -426,7 +524,7 @@ struct('nine',1), struct('ten',1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -440,6 +538,10 @@ struct('nine',1), struct('ten',1) ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -451,21 +553,43 @@ STAGE PLANS: TableScan alias: test_3 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: string), lineid (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -505,7 +629,7 @@ POSTHOOK: Input: default@test_3 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -519,7 +643,7 @@ struct('nine',1), struct('ten',1) ) as b from test_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -533,6 +657,10 @@ struct('nine',1), struct('ten',1) ) as b from test_3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -544,18 +672,37 @@ STAGE PLANS: TableScan alias: test_3 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: id (type: string), lineid (type: int), (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> 3:boolean Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -612,7 +759,7 @@ POSTHOOK: Output: default@test_4 POSTHOOK: Lineage: test_4.my_bigint EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_double EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_string SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -627,7 +774,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -642,6 +789,10 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -653,21 +804,43 @@ STAGE PLANS: TableScan alias: test_4 Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> boolean predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -708,7 +881,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_4 #### A masked pattern was here #### 1 a 0.5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -723,7 +896,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -738,6 +911,10 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -749,18 +926,37 @@ STAGE PLANS: TableScan alias: test_4 Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double), (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 4] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> 4:boolean Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index e6f06cd..ad7af88 100644 --- ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -1,10 +1,14 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select 'key1', 'value1' from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select 'key1', 'value1' from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -17,18 +21,37 @@ STAGE PLANS: alias: alltypesorc Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: 'key1' (type: string), 'value1' (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: ConstantVectorExpression(val key1) -> 12:string, ConstantVectorExpression(val value1) -> 13:string Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -54,15 +77,19 @@ POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@decimal_2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -81,12 +108,23 @@ STAGE PLANS: alias: alltypesorc Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: 17.29 (type: decimal(18,9)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: ConstantVectorExpression(val 17.29) -> 12:decimal(18,9) Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -94,6 +132,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.decimal_2 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-7 Conditional Operator @@ -167,13 +213,17 @@ POSTHOOK: query: drop table decimal_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@decimal_2 POSTHOOK: Output: default@decimal_2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(1) from (select * from (Select 1 a) x order by x.a) y PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(1) from (select * from (Select 1 a) x order by x.a) y POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -193,11 +243,22 @@ STAGE PLANS: key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -216,9 +277,21 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -246,13 +319,17 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### _c0 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression create temporary table dual as select 1 PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression create temporary table dual as select 1 POSTHOOK: type: CREATETABLE_AS_SELECT Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -284,6 +361,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dual + Map Vectorization: + enabled: false +#### A masked pattern was here #### Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_udf2.q.out ql/src/test/results/clientpositive/vector_udf2.q.out index 9e21910..4fa7bd2 100644 --- ql/src/test/results/clientpositive/vector_udf2.q.out +++ ql/src/test/results/clientpositive/vector_udf2.q.out @@ -24,7 +24,7 @@ POSTHOOK: Lineage: varchar_udf_2.c1 SIMPLE [(src)src.FieldSchema(name:key, type: POSTHOOK: Lineage: varchar_udf_2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_2.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select c1 LIKE '%38%', c2 LIKE 'val_%', @@ -34,7 +34,7 @@ select c3 LIKE '%x38' from varchar_udf_2 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select c1 LIKE '%38%', c2 LIKE 'val_%', @@ -44,6 +44,10 @@ select c3 LIKE '%x38' from varchar_udf_2 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,21 +59,43 @@ STAGE PLANS: TableScan alias: varchar_udf_2 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (c1 like '%38%') (type: boolean), (c2 like 'val_%') (type: boolean), (c3 like '%38') (type: boolean), (c1 like '%3x8%') (type: boolean), (c2 like 'xval_%') (type: boolean), (c3 like '%x38') (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9] + selectExpressions: SelectStringColLikeStringScalar(col 0) -> 4:String_Family, SelectStringColLikeStringScalar(col 1) -> 5:String_Family, SelectStringColLikeStringScalar(col 2) -> 6:String_Family, SelectStringColLikeStringScalar(col 0) -> 7:String_Family, SelectStringColLikeStringScalar(col 1) -> 8:String_Family, SelectStringColLikeStringScalar(col 2) -> 9:String_Family Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_udf3.q.out ql/src/test/results/clientpositive/vector_udf3.q.out index 7c6a90a..818a888 100644 --- ql/src/test/results/clientpositive/vector_udf3.q.out +++ ql/src/test/results/clientpositive/vector_udf3.q.out @@ -4,10 +4,14 @@ PREHOOK: Output: rot13 POSTHOOK: query: CREATE TEMPORARY FUNCTION rot13 as 'hive.it.custom.udfs.GenericUDFRot13' POSTHOOK: type: CREATEFUNCTION POSTHOOK: Output: rot13 -PREHOOK: query: EXPLAIN SELECT rot13(cstring1) from alltypesorc +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT rot13(cstring1) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT rot13(cstring1) from alltypesorc +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT rot13(cstring1) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -19,18 +23,37 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: Rot13(cstring1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: VectorStringRot13(col 6) -> 12:String Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_varchar_4.q.out ql/src/test/results/clientpositive/vector_varchar_4.q.out index f7c9cd0..205c67a 100644 --- ql/src/test/results/clientpositive/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi v POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -144,12 +148,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out index c96d04d..a769247 100644 --- ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out @@ -124,10 +124,14 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_join1_str_orc POSTHOOK: Lineage: varchar_join1_str_orc.c1 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: varchar_join1_str_orc.c2 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c2, type:string, comment:null), ] -PREHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -184,8 +188,20 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(10)) @@ -216,10 +232,14 @@ POSTHOOK: Input: default@varchar_join1_vc1_orc 1 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -276,8 +296,20 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(20)) @@ -310,10 +342,14 @@ POSTHOOK: Input: default@varchar_join1_vc2_orc 1 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -370,8 +406,20 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/vector_varchar_simple.q.out ql/src/test/results/clientpositive/vector_varchar_simple.q.out index b2f5b0c..3ee9a86 100644 --- ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -77,6 +81,18 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -137,16 +153,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -169,6 +189,18 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -230,12 +262,16 @@ POSTHOOK: query: create table varchar_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -248,19 +284,46 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index dcff347..0365f52 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -14,12 +14,16 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -31,12 +35,27 @@ STAGE PLANS: TableScan alias: count_case_groupby Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] + selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:int Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -45,12 +64,33 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/vectorization_13.q.out ql/src/test/results/clientpositive/vectorization_13.q.out index 99c99d7..cb57133 100644 --- ql/src/test/results/clientpositive/vectorization_13.q.out +++ ql/src/test/results/clientpositive/vectorization_13.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -31,7 +31,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -64,6 +64,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -76,15 +80,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -96,9 +119,25 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -123,6 +162,14 @@ STAGE PLANS: sort order: +++++++++++++++++++++ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) @@ -253,7 +300,7 @@ NULL -63 1969-12-31 16:00:15.436 -63.0 NULL 63 -63 0 -63.0 -0.0 63.0 -5011.839 0 NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -286,7 +333,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -319,6 +366,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -331,15 +382,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -351,9 +421,25 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -378,6 +464,14 @@ STAGE PLANS: sort order: +++++++++++++++++++++ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index ef44f65..775c3ef 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -31,7 +31,7 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -64,6 +64,10 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -96,6 +100,18 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) @@ -123,6 +139,14 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index 5de2092..0195f2f 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -29,7 +29,7 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -60,6 +60,10 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -92,6 +96,18 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) @@ -119,6 +135,14 @@ STAGE PLANS: sort order: +++++++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_16.q.out ql/src/test/results/clientpositive/vectorization_16.q.out index d93d810..2e3a34d 100644 --- ql/src/test/results/clientpositive/vectorization_16.q.out +++ ql/src/test/results/clientpositive/vectorization_16.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -38,6 +38,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,6 +73,18 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/vectorization_17.q.out ql/src/test/results/clientpositive/vectorization_17.q.out index 9014fe4..7590703 100644 --- ql/src/test/results/clientpositive/vectorization_17.q.out +++ ql/src/test/results/clientpositive/vectorization_17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -22,7 +22,7 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -46,6 +46,10 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -70,6 +74,18 @@ STAGE PLANS: Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_7.q.out ql/src/test/results/clientpositive/vectorization_7.q.out index 1875908..9080b75 100644 --- ql/src/test/results/clientpositive/vectorization_7.q.out +++ ql/src/test/results/clientpositive/vectorization_7.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -25,7 +25,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -52,6 +52,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -63,19 +67,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) @@ -179,7 +212,7 @@ NULL NULL -7196 -61 1969-12-31 15:59:44.823 NULL NULL 0 7196 61 78 NULL NULL 61 NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -206,7 +239,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -233,6 +266,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -244,19 +281,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorization_8.q.out ql/src/test/results/clientpositive/vectorization_8.q.out index 1c46759..cc56a4e 100644 --- ql/src/test/results/clientpositive/vectorization_8.q.out +++ ql/src/test/results/clientpositive/vectorization_8.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -23,7 +23,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -48,6 +48,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -59,19 +63,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 10.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 16.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) @@ -166,7 +199,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:43.783 -200.0 NULL NULL -11.0 200.0 -5438.15 51400.0 NULL 1.2116287E7 200.0 9.611 11.0 NULL 1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL 1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -191,7 +224,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -216,6 +249,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -227,19 +264,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 12.503)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 11.998)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_9.q.out ql/src/test/results/clientpositive/vectorization_9.q.out index d93d810..2e3a34d 100644 --- ql/src/test/results/clientpositive/vectorization_9.q.out +++ ql/src/test/results/clientpositive/vectorization_9.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -38,6 +38,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,6 +73,18 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/vectorization_decimal_date.q.out index 6cae52c..d2b56ef 100644 --- ql/src/test/results/clientpositive/vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/vectorization_decimal_date.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -27,24 +31,49 @@ STAGE PLANS: TableScan alias: date_decimal_test Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (cint is not null and cdouble is not null) (type: boolean) Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date), cdecimal (type: decimal(20,10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorization_div0.q.out ql/src/test/results/clientpositive/vectorization_div0.q.out index db79fe7..910557a 100644 --- ql/src/test/results/clientpositive/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/vectorization_div0.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -15,21 +19,43 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: (cdouble / 0.0) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -145,14 +171,18 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -164,20 +194,49 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 17] + selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) @@ -310,14 +369,18 @@ POSTHOOK: Input: default@alltypesorc 59347745 NULL 0.000000020219807846111 60229567 NULL 0.000000019923769334088 60330397 NULL 0.000000019890470801974 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -329,20 +392,49 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 16, 14, 17] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 39ea939..671f00c 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,6 +36,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -55,12 +67,16 @@ POSTHOOK: Input: default@alltypesorc -1887561756 -8881.0 -1887561756 -2281.0 -1887561756 9531.0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -72,20 +88,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) @@ -136,12 +180,16 @@ POSTHOOK: Input: default@alltypesorc -64 -2919.0 -2919 -64 -1600.0 -1600 -64 -200.0 -200 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -153,12 +201,28 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 1.0) -> 12:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(col 12) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 12) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 @@ -171,9 +235,25 @@ STAGE PLANS: TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 @@ -223,12 +303,16 @@ NULL 9370.0945309795 -48 1672.909090909091 -47 -574.6428571428571 -46 3033.55 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -240,11 +324,24 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 @@ -253,11 +350,32 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 @@ -307,12 +425,16 @@ NULL -48 -47 -46 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -324,12 +446,26 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 5 + native: false + projectedOutputColumns: [0] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -338,12 +474,33 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 @@ -393,12 +550,16 @@ NULL 2932 -48 29 -47 22 -46 24 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -417,12 +578,16 @@ POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limi POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -435,15 +600,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 0] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -452,12 +635,33 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 @@ -478,6 +682,14 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) diff --git ql/src/test/results/clientpositive/vectorization_offset_limit.q.out ql/src/test/results/clientpositive/vectorization_offset_limit.q.out index 7f41283..d8a8980 100644 --- ql/src/test/results/clientpositive/vectorization_offset_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_offset_limit.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,6 +37,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -51,12 +63,16 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1887561756 10361.0 -1887561756 -8881.0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,20 +84,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) diff --git ql/src/test/results/clientpositive/vectorization_part_project.q.out ql/src/test/results/clientpositive/vectorization_part_project.q.out index 5463c36..49e0b56 100644 --- ql/src/test/results/clientpositive/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/vectorization_part_project.q.out @@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,6 +75,18 @@ STAGE PLANS: Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_pushdown.q.out ql/src/test/results/clientpositive/vectorization_pushdown.q.out index 04780c4..664a9ba 100644 --- ql/src/test/results/clientpositive/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/vectorization_pushdown.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,6 +36,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) diff --git ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out index 616599b..4123c7b 100644 --- ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -111,7 +115,14 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -121,16 +132,34 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -152,12 +181,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -169,7 +202,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -179,16 +219,34 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -210,12 +268,16 @@ POSTHOOK: Input: default@vsmb_bucket_rc 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -227,7 +289,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -237,16 +306,34 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 08c1412..20c58ba 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -45,21 +49,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -109,7 +136,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -127,7 +154,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -145,6 +172,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -156,21 +187,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_casts.q.out ql/src/test/results/clientpositive/vectorized_casts.q.out index 9428cf3..e55ed4f 100644 --- ql/src/test/results/clientpositive/vectorized_casts.q.out +++ ql/src/test/results/clientpositive/vectorized_casts.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select cast (ctinyint as boolean) @@ -72,7 +72,7 @@ from alltypesorc where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select cast (ctinyint as boolean) @@ -146,6 +146,10 @@ from alltypesorc where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -172,6 +176,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_context.q.out ql/src/test/results/clientpositive/vectorized_context.q.out index 913d07c..dac9b6b 100644 --- ql/src/test/results/clientpositive/vectorized_context.q.out +++ ql/src/test/results/clientpositive/vectorized_context.q.out @@ -82,20 +82,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@household_demographics POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-7 is a root stage Stage-5 depends on stages: Stage-7 @@ -187,6 +191,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index 748580f..1412ccc 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -197,7 +197,7 @@ date_udf_flight_orc.fl_date date_udf_flight_orc.fl_time 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, to_unix_timestamp(fl_time), year(fl_time), @@ -220,7 +220,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, timestamp "2007-03-14 08:21:59") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, to_unix_timestamp(fl_time), year(fl_time), @@ -244,6 +244,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -266,7 +270,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: exception: java.lang.NullPointerException stack trace: java.lang.String.(String.java:515), org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateDiffColScalar.vectorExpressionParameters(VectorUDFDateDiffColScalar.java:304), org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression.toString(VectorExpression.java:163), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:601), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2124), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2114), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateSelectOperator(Vectorizer.java:1834), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateMapWorkOperator(Vectorizer.java:1665), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$MapWorkValidationNodeProcessor.process(Vectorizer.java:1295), org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:158), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateMapWork(Vectorizer.java:1074), ... + vectorized: false Stage: Stage-0 Fetch Operator @@ -462,7 +471,7 @@ fl_time _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 _c13 _c14 _c15 _c16 _ 2010-10-31 07:00:00 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 07:00:00 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 07:00:00 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_unix_timestamp(fl_date), year(fl_date), @@ -485,7 +494,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, timestamp "2007-03-14 08:21:59") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_unix_timestamp(fl_date), year(fl_date), @@ -509,6 +518,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -531,7 +544,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: exception: java.lang.NullPointerException stack trace: java.lang.String.(String.java:515), org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateDiffColScalar.vectorExpressionParameters(VectorUDFDateDiffColScalar.java:304), org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression.toString(VectorExpression.java:163), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:601), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2124), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2114), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateSelectOperator(Vectorizer.java:1834), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateMapWorkOperator(Vectorizer.java:1665), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$MapWorkValidationNodeProcessor.process(Vectorizer.java:1295), org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:158), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateMapWork(Vectorizer.java:1074), ... + vectorized: false Stage: Stage-0 Fetch Operator @@ -727,7 +745,7 @@ fl_date _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 _c13 _c14 _c15 _c16 _ 2010-10-31 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, fl_date, year(fl_time) = year(fl_date), @@ -752,7 +770,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, "2007-03-14") = datediff(fl_date, date "2007-03-14") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, fl_date, year(fl_time) = year(fl_date), @@ -778,6 +796,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -800,7 +822,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: java.lang.NullPointerException + vectorized: false Stage: Stage-0 Fetch Operator @@ -1000,7 +1027,7 @@ fl_time fl_date _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 _c13 _c14 _c15 _c 2010-10-31 07:00:00 2010-10-31 true true true true true true true true true true true true true true true true true true true true 2010-10-31 07:00:00 2010-10-31 true true true true true true true true true true true true true true true true true true true true 2010-10-31 07:00:00 2010-10-31 true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -1009,7 +1036,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -1019,6 +1046,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc LIMIT 10 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1030,21 +1061,43 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 4, 5, 6, 8] + selectExpressions: VectorUDFDateLong(col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 3:date, VectorUDFDateLong(col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 4:date, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 5:long, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 6:long, VectorUDFDateDiffColCol(col 2, col 7)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date, VectorUDFDateSubColScalar(col 0, val 2) -> 7:date) -> 8:long Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1095,7 +1148,7 @@ POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### _c0 2009-07-30 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -1103,7 +1156,7 @@ PREHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -1112,6 +1165,10 @@ FROM date_udf_flight_orc ORDER BY c1 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1124,23 +1181,57 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(fl_date), max(fl_date), count(fl_date), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 0) -> date, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE @@ -1160,6 +1251,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index f8ae962..81292ec 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -16,10 +16,14 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dtest POSTHOOK: Lineage: dtest.a SCRIPT [] POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -47,6 +51,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) @@ -76,10 +92,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dtest #### A masked pattern was here #### 300 1 -PREHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -106,6 +126,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), avg(DISTINCT KEY._col0:2._col0), std(DISTINCT KEY._col0:3._col0) diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index a11e058..32210ad 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -41,12 +45,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -54,14 +69,31 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: LongColAddLongColumn(col 0, col 1) -> 2:long Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> int, VectorUDAFMinLong(col 0) -> int, VectorUDAFAvgLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE @@ -70,11 +102,27 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out index 148bca0..fee0f54 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out @@ -28,12 +28,16 @@ POSTHOOK: query: insert into y values(1) POSTHOOK: type: QUERY POSTHOOK: Output: default@y POSTHOOK: Lineage: y.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(1) from x, y where a = b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(1) from x, y where a = b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -69,12 +73,23 @@ STAGE PLANS: TableScan alias: y Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: b is not null (type: boolean) Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: b (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -82,22 +97,54 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 49 Data size: 199 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 0:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/vectorized_math_funcs.q.out index 9616087..d276921 100644 --- ql/src/test/results/clientpositive/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_math_funcs.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -50,7 +50,7 @@ where cbigint % 500 = 0 and sin(cfloat) >= -1.0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -102,6 +102,10 @@ where cbigint % 500 = 0 and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -113,21 +117,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 500) -> 12:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 13, val -1.0)(children: FuncSinDoubleToDouble(col 4) -> 13:double) -> boolean) -> boolean predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 13, 12, 14, 15, 16, 18, 17, 19, 20, 21, 23, 22, 24, 25, 26, 27, 28, 30, 31, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 5, 3, 46, 47, 48, 49] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 13:double, FuncFloorDoubleToLong(col 5) -> 12:long, FuncCeilDoubleToLong(col 5) -> 14:long, FuncRandNoSeed -> 15:double, FuncRand -> 16:double, FuncExpDoubleToDouble(col 17)(children: FuncLnDoubleToDouble(col 5) -> 17:double) -> 18:double, FuncLnDoubleToDouble(col 5) -> 17:double, FuncLnDoubleToDouble(col 4) -> 19:double, FuncLog10DoubleToDouble(col 5) -> 20:double, FuncLog2DoubleToDouble(col 5) -> 21:double, FuncLog2DoubleToDouble(col 22)(children: DoubleColSubtractDoubleScalar(col 5, val 15601.0) -> 22:double) -> 23:double, FuncLog2DoubleToDouble(col 4) -> 22:double, FuncLog2LongToDouble(col 3) -> 24:double, FuncLog2LongToDouble(col 2) -> 25:double, FuncLog2LongToDouble(col 1) -> 26:double, FuncLog2LongToDouble(col 0) -> 27:double, VectorUDFAdaptor(log(2, cdouble)) -> 28:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 30:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 31:double, FuncSqrtDoubleToDouble(col 5) -> 29:double, FuncSqrtLongToDouble(col 3) -> 32:double, FuncBin(col 3) -> 33:String, VectorUDFAdaptor(hex(cdouble)) -> 34:string, VectorUDFAdaptor(conv(cbigint, 10, 16)) -> 35:string, FuncAbsDoubleToDouble(col 5) -> 36:double, FuncAbsLongToLong(col 0) -> 37:long, PosModLongToLong(col 2, divisor 3) -> 38:long, FuncSinDoubleToDouble(col 5) -> 39:double, FuncASinDoubleToDouble(col 5) -> 40:double, FuncCosDoubleToDouble(col 5) -> 41:double, FuncACosDoubleToDouble(col 5) -> 42:double, FuncATanDoubleToDouble(col 5) -> 43:double, FuncDegreesDoubleToDouble(col 5) -> 44:double, FuncRadiansDoubleToDouble(col 5) -> 45:double, DoubleColUnaryMinus(col 5) -> 46:double, FuncSignDoubleToDouble(col 5) -> 47:double, FuncSignLongToDouble(col 3) -> 48:double, FuncCosDoubleToDouble(col 50)(children: DoubleColAddDoubleScalar(col 49, val 3.14159)(children: DoubleColUnaryMinus(col 50)(children: FuncSinDoubleToDouble(col 49)(children: FuncLnDoubleToDouble(col 5) -> 49:double) -> 50:double) -> 49:double) -> 50:double) -> 49:double Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index 9d6bf0c..cd16b3b 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -115,14 +115,18 @@ POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -134,18 +138,37 @@ STAGE PLANS: TableScan alias: parquet_types Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), t (type: timestamp), cchar (type: char(5)), cvarchar (type: varchar(10)), hex(cbinary) (type: string), cdecimal (type: decimal(4,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 10] + selectExpressions: VectorUDFAdaptor(hex(cbinary)) -> 11:string Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -185,12 +208,16 @@ POSTHOOK: Input: default@parquet_types 119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 12.83 120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD 73.04 121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde 90.33 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -202,18 +229,37 @@ STAGE PLANS: TableScan alias: parquet_types Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: cchar (type: char(5)), length(cchar) (type: int), cvarchar (type: varchar(10)), length(cvarchar) (type: int), cdecimal (type: decimal(4,2)), sign(cdecimal) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 11, 8, 12, 10, 13] + selectExpressions: StringLength(col 7) -> 11:Long, StringLength(col 8) -> 12:Long, FuncSignDecimalToLong(col 10) -> 13:int Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -251,7 +297,7 @@ uvwzy 5 abcdede 7 4.76 1 vwxyz 5 abcdede 7 12.83 1 wxyza 5 abcde 5 73.04 1 bcdef 5 abcde 5 90.33 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -263,7 +309,7 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -275,6 +321,10 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -287,12 +337,27 @@ STAGE PLANS: TableScan alias: parquet_types Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), cdecimal (type: decimal(4,2)) outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble, cdecimal + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2, 5, 3, 4, 10] Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble), max(cdecimal) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> int, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFCount(col 5) -> bigint, VectorUDAFAvgDouble(col 3) -> struct, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFMaxDecimal(col 10) -> decimal(4,2) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -304,9 +369,25 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -327,6 +408,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: decimal(4,2)) diff --git ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out index 992a1ef..3a7bbbb 100644 --- ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -46,6 +50,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -61,6 +72,10 @@ STAGE PLANS: Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE @@ -79,9 +94,21 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE @@ -101,6 +128,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) diff --git ql/src/test/results/clientpositive/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/vectorized_string_funcs.q.out index 7b34452..5ddae43 100644 --- ql/src/test/results/clientpositive/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_string_funcs.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -20,7 +20,7 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -42,6 +42,10 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,6 +72,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_timestamp.q.out ql/src/test/results/clientpositive/vectorized_timestamp.q.out index 6880b7a..c74fa63 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -17,12 +17,16 @@ POSTHOOK: query: INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000') POSTHOOK: type: QUERY POSTHOOK: Output: default@test POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,12 +66,16 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -85,6 +93,10 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ts), max(ts) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +107,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE @@ -125,12 +141,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,18 +162,36 @@ STAGE PLANS: TableScan alias: test Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -171,12 +209,16 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -188,23 +230,57 @@ STAGE PLANS: TableScan alias: test Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: ts (type: timestamp) outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ts), max(ts) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 03c5ee4..b8928b1 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -63,7 +63,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_wrong POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -76,7 +76,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -89,6 +89,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -100,16 +104,41 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) @@ -199,7 +228,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -212,7 +241,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -225,6 +254,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -236,16 +269,41 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampString(col 1) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) @@ -335,7 +393,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -348,7 +406,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -361,6 +419,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -372,16 +434,41 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12] + selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 11:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) @@ -471,7 +558,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -484,7 +571,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -497,6 +584,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -508,16 +599,41 @@ STAGE PLANS: TableScan alias: alltypesorc_wrong Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: VectorUDFUnixTimeStampString(col 0) -> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, fieldStart 17, fieldLength 2) -> 9:long Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) @@ -570,20 +686,24 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -595,23 +715,57 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE @@ -648,14 +802,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL 0 40 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -673,6 +831,10 @@ STAGE PLANS: Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctimestamp1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -680,9 +842,23 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -717,7 +893,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -728,7 +904,7 @@ PREHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -739,6 +915,10 @@ POSTHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -750,12 +930,26 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + Group By Vectorization: + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE @@ -764,9 +958,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out index 0ecb226..5608390 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cast (ctinyint as timestamp) @@ -16,7 +16,7 @@ from alltypesorc where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cast (ctinyint as timestamp) @@ -34,6 +34,10 @@ from alltypesorc where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -45,21 +49,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean predicate: ((cbigint % 250) = 0) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [14, 16, 18, 20, 21, 22, 24, 26, 8, 27, 29] + selectExpressions: CastMillisecondsLongToTimestamp(col 0) -> 14:timestamp, CastMillisecondsLongToTimestamp(col 1) -> 16:timestamp, CastMillisecondsLongToTimestamp(col 2) -> 18:timestamp, CastMillisecondsLongToTimestamp(col 3) -> 20:timestamp, CastDoubleToTimestamp(col 4) -> 21:timestamp, CastDoubleToTimestamp(col 5) -> 22:timestamp, CastMillisecondsLongToTimestamp(col 10) -> 24:timestamp, CastMillisecondsLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 26:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 27:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 28:string) -> 29:timestamp Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -131,7 +158,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:59.95 1969-12-31 15:59:52.804 NULL 1969-12-19 17:33:32.75 1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:54.733 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-30 22:03:04.018 1970-01-21 12:50:53.75 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-27 18:49:09.583 1970-01-14 22:35:27 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select cast (ctinyint as timestamp) @@ -149,7 +176,7 @@ from alltypesorc where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select cast (ctinyint as timestamp) @@ -167,6 +194,10 @@ from alltypesorc where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -178,21 +209,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean predicate: ((cbigint % 250) = 0) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19, 20, 8, 21, 23] + selectExpressions: CastLongToTimestamp(col 0) -> 13:timestamp, CastLongToTimestamp(col 1) -> 14:timestamp, CastLongToTimestamp(col 2) -> 15:timestamp, CastLongToTimestamp(col 3) -> 16:timestamp, CastDoubleToTimestamp(col 4) -> 17:timestamp, CastDoubleToTimestamp(col 5) -> 18:timestamp, CastLongToTimestamp(col 10) -> 19:timestamp, CastLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 20:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 21:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 22:string) -> 23:timestamp Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator